diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..1dee50abd83e3d23d2edfb2aa5c69d94fedcab5c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +*.jsonl.tar.gz filter=lfs diff=lfs merge=lfs -text diff --git a/eval-results/arc_challenge/25/ckpt_003/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_003/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2915cb7ec2037de5f57425858c4d3d0084f59e70 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_003/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09427034954ac308622fa4f7a9150905d6176d7e836163af90405178d46f4b1b +size 2267278 diff --git a/eval-results/arc_challenge/25/ckpt_003/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_003/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdccc3e1ccda2c7cd618d8858ccf4c60f813ae1e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_003/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ef12426cfce4d1d2368739e717da24079000e9a3cef1d7344978cfffb52f937 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_006/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_006/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dae02f87e4e86b4d95f4981eaaf5bb02ce583a17 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_006/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95c30af4e510049c3caa20681f0d6eb6d0323d3085535174a990a831ea2e11b4 +size 2267835 diff --git a/eval-results/arc_challenge/25/ckpt_006/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_006/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc618d2ff29b78d086a3352307f49fc8d71d76c0 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_006/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73f77bf9496749d93f979295c98537113b323379e875d727f38e5c08b3ae501 +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_009/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_009/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c5fc1cdcf345eea94e3b3d173e0dcbb7dc7b1f6 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_009/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e12e3e33e6233fc6d25d6199e7ccfdee658772eef6f9df518f2d838943880cb1 +size 2268278 diff --git a/eval-results/arc_challenge/25/ckpt_009/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_009/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c5ee0b5e08caee2d0f23c397559e7b9926bdc66 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_009/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c11eb41f363f5467c6f3b64dd21ec1dd1a022be511d088c86bf0371bd0d2f4 +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_012/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_012/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b90b9481d730bf58f645a92bd2eb9b94ab32a118 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_012/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f960c892a55eafa725e29e4b383aff11119d3380ddfed868684f832f7d7645ed +size 2268579 diff --git a/eval-results/arc_challenge/25/ckpt_012/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_012/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f23120cd8dd52babc74c2dc5522bff232307795d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_012/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b5bef1b92c17fc57cda387c9fcda1479adfcb682c97f7c6d972102ac7774d46 +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_015/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_015/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7515a5bdbe83e980268fd8b6d3b8c6858974e2b7 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_015/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980f5c264ba7a4775d8978b9e987fcdf2d810b1adb48e79b777d1bbeca05eb19 +size 2268914 diff --git a/eval-results/arc_challenge/25/ckpt_015/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_015/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8027bfe27392316792bedbc7d922a0000f5cc26e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_015/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a1883ce9b3e0dd2aa015974554d6c04467d67ae2de99b08192c353e9d2277dc +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_018/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_018/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8e9057f71ba82bb860205e1bca0b61eb4941359 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_018/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c430d1d41fcdb8600ece38265a37e877759ed56b0af89042428572563bc63ca +size 2269072 diff --git a/eval-results/arc_challenge/25/ckpt_018/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_018/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ec8b6c458fff435365f355a8f12e0828b8a767d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_018/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee8eaf18e3d78f8dc6b1360c9de8feef6939ede3231b7b3ee12ecac10c104a9 +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_021/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_021/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2e79e792125cdd191ade169f683538fa2eb484c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_021/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b41e2595fdbd6dd8ec0c4c7b2a5ac23afb8543c05c6b19ed5d5d2ddf614f5ffa +size 2268834 diff --git a/eval-results/arc_challenge/25/ckpt_021/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_021/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe01190efa0a0ff8c3ebf7286a5aba88f8b2c951 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_021/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7209333a0a7df56db43df479659f29eb315ceb5f1a7a6c75a3944600f130e677 +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_024/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_024/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0bf60a8eabecb8657e8fb4d0ea1bd6f4126099f7 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_024/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f9a4714f1855c5349bc378e34dbbd4002aa8d13673ab85f791a142a0bfff34 +size 2268988 diff --git a/eval-results/arc_challenge/25/ckpt_024/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_024/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfe72e80ec3ba6f4d1f034b0da1e08d32a239469 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_024/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f530ad88a2fc363fda6292bc40f6bd3d9ee1afb054ed896ddd8c202d718df7a4 +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_027/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_027/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df37990de9a8e5a720757794896806ae4309da06 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_027/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5980e929f0e2c2463b0a7bd7e0d82bc72371fd15cd90bc7757c1448bfc469668 +size 2268923 diff --git a/eval-results/arc_challenge/25/ckpt_027/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_027/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba9c438fe09ab90b0c3515432e3a4e9bcea5f8ea --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_027/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82c98599c13896c22a4a4c64473419dc3abdcc2146abfbda6e9488c5091122fd +size 2715 diff --git a/eval-results/arc_challenge/25/ckpt_030/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_030/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80cca76c7526619bb9b6eba131624016c7eea81c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_030/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19466827850547ee1035c5ba148843a2ee808366f4c0b6102a9d79f3424952d +size 2269233 diff --git a/eval-results/arc_challenge/25/ckpt_030/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_030/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02192802994dda622202a41a7465ea760eaaddba --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_030/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:027487c56d13c93fb50de2b807353fd631ace6ccb088210cf95f942557160bd5 +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_033/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_033/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27cae5a156c208fb69c667d2dff29839d3188a69 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_033/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b1a2eb36c241fe2f486c6e1dd5ba800e704f6d296e46d4f86295a672aa5624c +size 2269431 diff --git a/eval-results/arc_challenge/25/ckpt_033/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_033/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..956039ce1ccf8789235c7baf022110a03827911f --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_033/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeebfaa78a4314186e687f7a856ec5b3f7f4c881817c477dc8c434f4b21fbc6d +size 2722 diff --git a/eval-results/arc_challenge/25/ckpt_036/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_036/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e9a8dcd9a455c9929ea07c5898a5db6b1cba044 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_036/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b841343cf5930896d3cbb976a81ed859258d0c8510cbab4bd848bcdf9407c5f +size 2269380 diff --git a/eval-results/arc_challenge/25/ckpt_036/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_036/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..baecf5b11d7807df1c6a49231f1e30a73f933405 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_036/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ad539fd48e48a889dfcf0cdd862fd639f850e8bf2b6bc9735cf4001b1b0aecf +size 2716 diff --git a/eval-results/arc_challenge/25/ckpt_039/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_039/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc376cc67b3c4d653a2f0e155d201bd414bd184e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_039/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:372bb01964b106934e6c9fd7eff34b59b06ee9cf7d27da775f82ec0f61c14826 +size 2268990 diff --git a/eval-results/arc_challenge/25/ckpt_039/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_039/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1893cd4603c87edc6e09d334592d9051a168c1e9 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_039/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebc5863898f543faf4f753edcbb49d1b74b7e5786686c42b858574eca808d958 +size 2722 diff --git a/eval-results/arc_challenge/25/ckpt_042/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_042/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa7e1d896f4de6ddba64ef52db1ee2e2121c461d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_042/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9768098f0696990e80e0b58ecc3e8bca1f56341ba862b541c20235e7e88054f7 +size 2269356 diff --git a/eval-results/arc_challenge/25/ckpt_042/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_042/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..200b728415afa0d02eba1bed976f82208d057fc6 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_042/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24600376e56fbddee18946269202c55c8314354bd2df0c5bcbd82fd7693c4c73 +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_045/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_045/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c75134f82c7c7269e205294bf4d398a2db64d41 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_045/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46ae8a45fe3e5d87305192c0630cc55f19e81cea981afd65c680f99c7059ac5b +size 2269312 diff --git a/eval-results/arc_challenge/25/ckpt_045/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_045/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd75665421bb029d63f5da0d1605e970e40f408c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_045/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1da81dbefa13440dc0854a81b04d68d0975686635a8fcfef88c80e5ead8d967 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_048/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_048/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be384e71cb640e2f53e652a1c4d40846092fa047 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_048/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab961c0ad4106f1acd4b2d133b13a9b07cdc8f9a19f49a1c2b6e928dd393492a +size 2269394 diff --git a/eval-results/arc_challenge/25/ckpt_048/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_048/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6023ebf79de31a74e2ee7822eda55ba7cc7cd167 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_048/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38161df9aa555ca8ece4602f6aefd3752b065eae1e45ba9236288948d59cb7fe +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_051/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_051/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0afb88d74675a0aac7f27b3b8989a1b5bceb8ad --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_051/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d2da45810d92eec9f94e967107fcf5fa9bf2c9459d95781a6e7e837db8082bb +size 2269532 diff --git a/eval-results/arc_challenge/25/ckpt_051/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_051/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..724c7b035e4029049f4ca3f56898b4cea5a24ba9 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_051/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:061c9e4d5f4adf23105323a16ffb5a8be51fffd44f2f483bee3aec66937b476c +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_054/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_054/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..697e79f93c08f762e067265326977a2b3a167aff --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_054/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef3ecf947554cee711f65998be35ef4b57a175f12af949fe126babf7ae7187dd +size 2269331 diff --git a/eval-results/arc_challenge/25/ckpt_054/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_054/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a7698c781bad17a2879566f1a6a5bb04b34cadd --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_054/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5da23887c2d28cd5cdcc66011b843ff213914c0533a741856715266664400792 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_057/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_057/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..279a7c7fd2b836de85c0ea1eac518d922819f38b --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_057/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c97349dd93853dd0c9e9db347dad323e505da463be35575056e43060b8c1819 +size 2269430 diff --git a/eval-results/arc_challenge/25/ckpt_057/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_057/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9db14b9977e0e1454dfa856fd363746b99f12c96 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_057/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8793fd5747d503d203ad567b995e169da6ad36f950b4aa3e585d99d0252c5bf4 +size 2709 diff --git a/eval-results/arc_challenge/25/ckpt_060/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_060/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62499e583786634a1ec1501733c9e97d1ce08543 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_060/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf5ee6c91a7903060f2723655373ae962e686d76f5d831a8dc4ce655e6c651ac +size 2269532 diff --git a/eval-results/arc_challenge/25/ckpt_060/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_060/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d565a69e7c8f1196d7bf585d4c6c9ece901393de --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_060/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be6429ceed80abb184595b7aca9deb1cfe250f4a94ff5628b1319b8c52122ba +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_063/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_063/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd6f3fd9742631a062e3804e9dca83c3540b41b3 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_063/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14dc09d0ab66e6af7f72d13d6b10e446005696908c6ef9a4a57870ab231faa04 +size 2269441 diff --git a/eval-results/arc_challenge/25/ckpt_063/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_063/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0faf99d67fc7558fa3852833136e221d9a9fcefb --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_063/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:983212dfbcb6ec4850284288a499a65d9b2978f15fbefb20f607781c3f007330 +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_066/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_066/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d33876bc2d62b1df0c5e4708bb23baf47d4223ae --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_066/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c27788dd79df3ec9ecd326e45caa66e3a160dd07a055592923a5a1d19134ffe7 +size 2269529 diff --git a/eval-results/arc_challenge/25/ckpt_066/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_066/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9e0322d2e81b23d53764484077dbad8c8efb75c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_066/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f148fdbef3021a2178f8d4024cedb83c959dfef7b15590d630cb1b034d0a1ab +size 2716 diff --git a/eval-results/arc_challenge/25/ckpt_069/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_069/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7adbdbd5641c31def739856e2a47d4d1bb17f98 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_069/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e86a0f0246bbcc87f8f075662eabaf32ba4681454dd1abcdd47b02284bf7933c +size 2269509 diff --git a/eval-results/arc_challenge/25/ckpt_069/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_069/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43044037f615f3e4c7ac6d181ed794bbcfc18b0b --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_069/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bfc3f501c267431143562f31ad554d1a57c752bbe1793e3bc86e260fbed890e +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_072/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_072/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02a4ba12462f10cfceafe4e41ac4e4482fe6a1e8 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_072/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:711988ec65c8789b75873f0a5198d83307e0cf03c21b199ded3bfa75cb036059 +size 2269606 diff --git a/eval-results/arc_challenge/25/ckpt_072/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_072/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5d3a46d0ba87e477df37135dc688bcb5f348240 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_072/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd486f869a29980a766a5967f3f3a22238b6c283f089b45e7c9b5a6d96a34b30 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_075/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_075/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67ed9e6e64cdfd7868227b93e6371d54065772e0 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_075/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1cfa259dde58cc3a99241e8b517866b2294aaad74e15a0c48e45975cf0b3dfe +size 2269367 diff --git a/eval-results/arc_challenge/25/ckpt_075/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_075/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f228c7e196d0bb01cd343a5abb3015b92912870 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_075/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9be8b58d827027854992fae0c971abaaaaa7ecaa20d6cd526505250ec78fbde5 +size 2747 diff --git a/eval-results/arc_challenge/25/ckpt_078/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_078/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12edae44b7cf4f8b2ea22f01c36989ed5ce0178c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_078/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07973272321be7b14c927f81525bf36b5cd55a63044593b4613ef9ba20a6ef7 +size 2269609 diff --git a/eval-results/arc_challenge/25/ckpt_078/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_078/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f989e4578257436896a4464e36e1361453f336a1 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_078/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04aa8fccb8a69a9f4d3963aacc3498df8114aa0b86c188beb34e3a2a8eea0c12 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_081/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_081/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98a482a2086d8dffead7428e3ea0c701c40d072d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_081/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:facf99bab9d2325848a73537b1b06d851e3fd5d0147833aa10e7dad8f5f03b57 +size 2269696 diff --git a/eval-results/arc_challenge/25/ckpt_081/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_081/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f444012b960fa3de70cb405dece422a7153e831c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_081/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9f4d9cd778c3e2a5375b6553b7db2a9bfa5a195fbad46c7b1726e942f322a5 +size 2709 diff --git a/eval-results/arc_challenge/25/ckpt_084/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_084/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b636e1a8a2e37e48167d45a7814a23f9752e831 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_084/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0de4d2fae974c2a9db0b5a3c290750b3335f3bb96829334c685306e692440ecb +size 2269450 diff --git a/eval-results/arc_challenge/25/ckpt_084/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_084/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b8024f3bfb5e3d806f1d2f83fcd870c2e3dc07c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_084/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5c707d9f0c4da3bc8fa160884e8a362fd03834f12453d0f8ae6a4595352e586 +size 2721 diff --git a/eval-results/arc_challenge/25/ckpt_087/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_087/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8d0937818ffbf0bcb38584b7f84bfe587bca50d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_087/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f276ef1f2d3663e3d7ca16f42a69d7e5de046aac77b143cd0c503c13dfeb6e +size 2269469 diff --git a/eval-results/arc_challenge/25/ckpt_087/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_087/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9bd9eef118b7431792fb3f6101835bbe3a73132 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_087/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0869be9bb84721ed9af5908df6a639a6f5b1fa3126cb1c2eac0db748ef62d51a +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_090/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_090/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61fd95d97dd0f7032ea635f641a87f6375bc9a6c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_090/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8334e26a375cca10c7207cce0c210563bc109821976e60c80532d6e4b740a1 +size 2269490 diff --git a/eval-results/arc_challenge/25/ckpt_090/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_090/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fe23fd0ea08c699d587efa35622462c33eab716 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_090/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e0437ffa22e9792f784496e76c6141b5192f40fbf33568c63d9d76fb3c3f6a +size 2746 diff --git a/eval-results/arc_challenge/25/ckpt_093/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_093/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2250130c8d4f0943258fa4fc4d5b41bea9933bb --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_093/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aab44f4214430046da58fd1e46f605d507e63c963106397b2de34ca1395c978 +size 2269440 diff --git a/eval-results/arc_challenge/25/ckpt_093/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_093/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e12ffd2b0f2a5a0ae549bdeb8012b4d8eb576960 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_093/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd1fd02418e0f633f09da9e04ade32bd7736fd300f777b52595eb0145c7be596 +size 2721 diff --git a/eval-results/arc_challenge/25/ckpt_096/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_096/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3ab7a72830c16e11cbdc5488b826d2f250111a8 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_096/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b386309f861655b77483ffc49b6671879af6a15d5eb8a418114ed744f943856c +size 2269626 diff --git a/eval-results/arc_challenge/25/ckpt_096/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_096/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3286db156b6184e607544785a7264fbe2744f230 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_096/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ec6ab7ce02eee13a48ee2e558c8d79917f327293d13d9000604e6ca7491cae +size 2747 diff --git a/eval-results/arc_challenge/25/ckpt_099/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_099/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab78ea45cdacbe3665e5b89c26a7a18ba66c4110 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_099/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a42f94d709ea33174e2c31aa8ddfb8baedb0214ceda4e27ed5dfe9e46e952b05 +size 2269460 diff --git a/eval-results/arc_challenge/25/ckpt_099/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_099/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a81fff14ce8eef0293499b8667c9654342c5af9d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_099/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:311c63b38430d5bdf25e861f28da509ea3cec377678af6a9869c3f58a306f65d +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_102/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_102/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5fc7a45f994161b70283aa9450a504353a52ce9 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_102/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3006c07c103f6cfafd7b08fab80a22cf7c6d6b6de5471f9fe1b60b73531b7f79 +size 2269396 diff --git a/eval-results/arc_challenge/25/ckpt_102/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_102/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..032447b04c83856022ed6e33814710da29271950 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_102/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c6af415354cec34114ed334bb5f55c113e03a362d6077911bdbc8eea7dc5a9 +size 2747 diff --git a/eval-results/arc_challenge/25/ckpt_105/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_105/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3630e16a0df69317f9717d06c041343159d94797 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_105/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5be6af7423dd49a1cb31e3fae4ee62ebd7086a237a360458b16b5ab59df5500 +size 2269705 diff --git a/eval-results/arc_challenge/25/ckpt_105/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_105/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15b0acd9bcf5cd7e8d6e309be6f4f3716e6cb496 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_105/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b34ed7377ea6fa37fe1ca185ce7fb9287a8d9274f9b5503bb404b395f220f623 +size 2714 diff --git a/eval-results/arc_challenge/25/ckpt_108/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_108/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4291bd3905a30e09c9a4e061c6412ba656dab37e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_108/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d3e8e759f3adf8c21bda3a470117788a716f7a580457d3c236e1cca03e433e1 +size 2269325 diff --git a/eval-results/arc_challenge/25/ckpt_108/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_108/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74e86ea898925d741a0eb366e12ee3b59dcbbdb5 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_108/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cbdc1cd4e8389309b01ea3aff90ae1ba4f2be8a4adef97920513f1eebe487e3 +size 2712 diff --git a/eval-results/arc_challenge/25/ckpt_111/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_111/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4aa6faf8d9e4ad4af71168653ed841a56c2faa12 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_111/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38e8dba71c994f029886e40f37d4fef72d33080a3a0b6bbf6e2fe9474f83f21f +size 2269436 diff --git a/eval-results/arc_challenge/25/ckpt_111/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_111/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9bd96230f3287e1bf5b06ba4345e625d83d3e79 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_111/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9c1242519aca5d9290f9ba69735168e0e716e8173fc8503b9ca9f4caff30782 +size 2746 diff --git a/eval-results/arc_challenge/25/ckpt_114/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_114/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ec3b1238c0f1c427246d42d5e3585d66ddd4e01 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_114/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4bb0be10cc47ff8d576ddc54b432a9640b47fdb57802bac3cb8a309cd4da0d +size 2269834 diff --git a/eval-results/arc_challenge/25/ckpt_114/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_114/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dad619e66c9d73a0648d75e77c0eb278d2931a6e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_114/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c779831ffe9d782429d5d854aaa17887d7b523aa6ec4e701ca9e9f2d37798a32 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_117/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_117/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73981f8dfc4923a60178329bc6a9be6d04f707af --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_117/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d06d9d517c58d1d84760d6ec700aa38069d4ae6e29a0d894b15dc479eba469e +size 2269677 diff --git a/eval-results/arc_challenge/25/ckpt_117/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_117/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1834ca68368059341cf59ae477e653fedc22aa6 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_117/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffa8507ceb0e6caf657e739d2e85a3a11fec904ab65314753a518090f84ee78a +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_120/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_120/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79714139930688c2ece203404bfac8be90b64986 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_120/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e8b553a78a6dc33282a03a557767c7a269a818c17def12183c8228fc45c0e0 +size 2269591 diff --git a/eval-results/arc_challenge/25/ckpt_120/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_120/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..626a68a77645e4eba6b653889724e57133e8cc4e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_120/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:438cba3252f186e3386d9d89f52cb2678b2e666ee3edaa6ea614f43ccdfbfe9c +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_123/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_123/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f45fb401272d35f0e26951674e518d02b131067d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_123/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85af2d9b0d01254590296a4ed38d1c3dc06491a34c030a8d68bd25b5b401e3d7 +size 2269710 diff --git a/eval-results/arc_challenge/25/ckpt_123/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_123/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8c1b3f85fc69bf077bba05b4d7273cb80e1cca5 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_123/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3764068c6cd446036d7c12b6a584a969e91e2a864e0bf797b0eed990c7093d3d +size 2721 diff --git a/eval-results/arc_challenge/25/ckpt_126/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_126/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdd262ac95f7e5a08f3835f5706e77fca3231702 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_126/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d09c8e809c21da76e303cb2bb108777fc627096cc91b66ad41b8a685a81c84c +size 2269617 diff --git a/eval-results/arc_challenge/25/ckpt_126/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_126/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f12e3815cbd14d44f502267ffa61619bf85c985 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_126/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5561513a6748398c8fe84e2184a0b47be15b70a1bd5f7e58175570509edd61e +size 2747 diff --git a/eval-results/arc_challenge/25/ckpt_129/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_129/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f9e006a6e7be7718540858246c1270f7ffd36e1 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_129/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ee40a9e1cc12d9514800410008f4940d99f33241a63fb8a32f8cc74dac3baf +size 2269362 diff --git a/eval-results/arc_challenge/25/ckpt_129/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_129/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb2e240dcee282bb5e3c2b40a980bc54ab413e09 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_129/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbde0c5393d1fd53de69f2d41a208faf637b1f6bc9b5ea50e0475fc7dd73cccf +size 2714 diff --git a/eval-results/arc_challenge/25/ckpt_132/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_132/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eef881f9b0a04547f3c7d2517abc7a96042c176c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_132/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91bfac2b12e685bad3e9e9bc9098ccff708e7d3ed802dc363a92455140f916a2 +size 2269712 diff --git a/eval-results/arc_challenge/25/ckpt_132/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_132/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48f5044ba92255c454b0e847b671ef8c33b87da7 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_132/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bfe82230fd4632957feb0df0bb35d6226ba17d2196418434fe5dc9e75b98b55 +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_135/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_135/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa4768c01fefd9a468da0e511126f7953dd7d575 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_135/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3338a6fb331b37388d825e3b0a780df03690c05a71b8143fa309ea3defed9ba +size 2269702 diff --git a/eval-results/arc_challenge/25/ckpt_135/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_135/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..369de3f9373051605a9ac1430daab6ee764a629f --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_135/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e72dd626da5162a67d3b5c3a38726e14a93dae40d5d3d07642faa61a263f825 +size 2744 diff --git a/eval-results/arc_challenge/25/ckpt_138/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_138/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51e6f2bef9fc6afa90e3ac73856827106c4fdc65 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_138/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4baa0a02c5a02d9971a62e84f85c85d2d51c93e967c6c7c1fcad5bb7d864036c +size 2269625 diff --git a/eval-results/arc_challenge/25/ckpt_138/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_138/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a77e9ee19b8c827f9bc31b49797d077e1e2168c3 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_138/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529ec6cd53355565d0415aa63929811bcf4ec9de4dd8650e8fce1fa795848baf +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_141/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_141/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01d2a9cf314a29cf9a24ea7d22bf8d1aea80c697 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_141/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be5a7b84d8e55de0511af91f121e69c52a3695b503326aff7a6791f9c729b55 +size 2269718 diff --git a/eval-results/arc_challenge/25/ckpt_141/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_141/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1c6afd160acd63b25dfa4016ee6cd2a49693ca9 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_141/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea0845ad63252088924c303c8d5e711f8f72cfe385c3afbed1abdefd633a7b2e +size 2716 diff --git a/eval-results/arc_challenge/25/ckpt_144/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_144/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2798f188cf068f9836cca3c452ee16ca60bca32 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_144/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b07dab601b944fb0534c9b2c32c7d97fd3df8d790423538da7d016dd01cd9f8 +size 2269733 diff --git a/eval-results/arc_challenge/25/ckpt_144/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_144/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c00f8d91aa1cc54f587fa8e16faa8c64ef77fb1d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_144/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec3bfb692e2c55cd4b66db771877043b176520088908bbc48cff97b5da77c13 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_147/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_147/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..102ff22761ea84637cb7161bfe620988faea09bc --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_147/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14bd68cab018acd05ad88e85e6c2aab7d4da102b9931da761f820e782ca72666 +size 2269532 diff --git a/eval-results/arc_challenge/25/ckpt_147/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_147/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..647a89c5d3ab1c87eecffe2df1eb04a2f0bd209b --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_147/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcab5ca77aec876cf8e37d77485c7fd951bd5fe2ad4814fe5f358746fe001a97 +size 2715 diff --git a/eval-results/arc_challenge/25/ckpt_150/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_150/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6322be4362398ea809eb3cad1b6cf9b2bd6585a7 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_150/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:440b337a6acc355b5db4116c6e8b6afa2bdb45d78027e23f030e6bb3155379b6 +size 2269484 diff --git a/eval-results/arc_challenge/25/ckpt_150/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_150/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5005f6ff588ce4bde56441f39d70a365d5ea0ae --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_150/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b87847c23d6381131f2542e82656968b6e2f4c68bd493968ca6abc1965ae460 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_153/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_153/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28ccfcc963255df491de91c3770d0035e3d9f2ba --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_153/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbee07875e97cae62ed8c76626902bc2be25d44fbcadff8974b8ac49ebe974fe +size 2269866 diff --git a/eval-results/arc_challenge/25/ckpt_153/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_153/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b39d784c5e30432a71bdcc50fe428b109ca6f20 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_153/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36eb38a083bea4b60d3a7b0df06c796d95e476381116cbd0c4280751ee2beabe +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_156/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_156/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10b151dcbcf8baffacdd7dbcdcdcd09e9bdf10c7 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_156/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56385c0bf14e6b8bd3b640a23f322b94e04caf52177c01d6133bcc2a561cadd3 +size 2269839 diff --git a/eval-results/arc_challenge/25/ckpt_156/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_156/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d94875760464eb6d3068d3b6d8717cb91cc70000 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_156/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:801381f7afaa798d6970e74be83994a4870450f6dfe43bc10914f33fdc1c7ccf +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_159/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_159/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d545a0f955874df261777dea10b9654b58992db0 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_159/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22fc6083905017c9d5995ab9bcb99374ee5784ac15bf14e41ce734652d179ffb +size 2269609 diff --git a/eval-results/arc_challenge/25/ckpt_159/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_159/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8240b81bc09395c0619cf2704b51997cee3c928 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_159/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b7db97a401bb043f9a6cfd90757886b75d0f047b579c86fed3715278bb3fc4 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_162/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_162/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51e22f841a1b8f793a7f11f5a9e2faa28be46b81 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_162/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc313c0cccfe72611537b5c6cb54ce75324167f3ff9acc90fa25d3841e17df1 +size 2269750 diff --git a/eval-results/arc_challenge/25/ckpt_162/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_162/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce85f0a86a81a7950d895eb2db11cda2be81c472 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_162/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27a49c8d416e6b73bb346a5691efda1e53ff3924f20599d7d28e412a66362c5c +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_165/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_165/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8df81e49e96d60274751c7770308b5200de6bd50 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_165/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d68152f6fc1113a67be07a91765d7970f951a7beaaeb2b116bc6e2172afd925 +size 2269740 diff --git a/eval-results/arc_challenge/25/ckpt_165/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_165/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42cd94d6b0d0dc1cd88da008eaddfe694ae24da1 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_165/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fcadaaedc36fddcea7a8ef90e70fcfa14a48334a7bb1f1a86c064d121e08b23 +size 2715 diff --git a/eval-results/arc_challenge/25/ckpt_168/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_168/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38e1cd3dbe04e8373338764b37f4cc6882dcd5c2 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_168/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0705bfd5dec73816cdcd93475488fea5e768be2d72a99d3b1c37e3c6d1b7a47a +size 2269681 diff --git a/eval-results/arc_challenge/25/ckpt_168/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_168/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63a9bbbb7e7af4debbc7ae84b8e821fc681339f2 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_168/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8334bc2c6046c1cdef0828899243c62b74c206d5b8c97b0d6941c72b677fcde3 +size 2747 diff --git a/eval-results/arc_challenge/25/ckpt_171/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_171/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afdaf933f58060c389de29efcfe630ccbebd8f3e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_171/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a18d319ec55b0d14e990a0e4d09ae2224fdc6a3857922d122a0b83f21aeeb3 +size 2269474 diff --git a/eval-results/arc_challenge/25/ckpt_171/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_171/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..053a221cc3e4d2f9a91b00cb19072b138ddfaf4c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_171/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0df65ce289e26afdfc1e91b617a65ee564b7d23f9b69f430268972d727f8608 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_174/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_174/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ce7917ec6555e367bfc1ed1141b23caf5eae984 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_174/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7191bb76c713f25953444e705426571f028d288ffebe5464510e478d617ceda6 +size 2269478 diff --git a/eval-results/arc_challenge/25/ckpt_174/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_174/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94b35b6b08b4dafb22a2ffd7c23e74e8d7fd77b2 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_174/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d790dc35fe10a61e2212984f2cf6d6a4b247c3f7ba4383f9b82fc135fc4e0e0 +size 2746 diff --git a/eval-results/arc_challenge/25/ckpt_177/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_177/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..821deb262cb3f0cc0cc811525e6a053cba655b89 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_177/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b19ec01d6a5d37dbb866236b4a490d40b9bb905667112f66f5c11751dd12068 +size 2269629 diff --git a/eval-results/arc_challenge/25/ckpt_177/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_177/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f871584657b8f6fc2f350b7b91f4d1732a7416f0 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_177/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66ec9441b2b2f99d6f4fd64e388a8cf09e715f60d99c6e7ecb7cb65beca78a7e +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_180/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_180/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06f38f9d59138f92bab082ac7637796055ad5388 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_180/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5d1151d6ffdae7fa85015a21ee6a002b642cfc1a647e15ef8d1ad09ec3ba60 +size 2269659 diff --git a/eval-results/arc_challenge/25/ckpt_180/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_180/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a889fbabc1ac3f48cb0c9bd81c9eb60cdb784533 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_180/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bd55f23df017d293e7eaef5a6c750a674c0f60260ca0b45875221d8cf6211ed +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_183/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_183/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a17fb99738c203c78af349f26e895bcdba844908 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_183/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502b215fb6c97bada9528b878d92c205a89070128e6e4ebc76ba88902e3bfe75 +size 2269734 diff --git a/eval-results/arc_challenge/25/ckpt_183/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_183/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ecae46638e830aa8716f69f5972b1a4b2f89a2f5 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_183/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a269cafa732e7083fe66ac755083ea4df29568fb0e44160074d6ff5c234327 +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_186/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_186/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14ecfdf2e7e75fec861bb6a2afd6e0557ba43091 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_186/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e206bb0525117cb560b5c3de805258523639a1147059950d3684a858ce9a22d +size 2269604 diff --git a/eval-results/arc_challenge/25/ckpt_186/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_186/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93f6ec5ac3b87055ea239aff1a5ce6b7791bb9ee --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_186/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6cbe434ff791e59befdc4bf683b00f1fae0eb35e4f0f5125c314cfc81a3145c +size 2721 diff --git a/eval-results/arc_challenge/25/ckpt_189/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_189/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9edf1a9abe9252f53c03d0bdacdd615f0d610e0 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_189/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ce3d3b76b88a80b4f010458521d2feaae9fc860bb477863aac5dbeec03a9fe +size 2269469 diff --git a/eval-results/arc_challenge/25/ckpt_189/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_189/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0839ec37dabe1aaf41a271b1c00fd1440b76c0ac --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_189/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1090d4847b083320b7f04aa541c6e56e33f8509650ba3797cf49f561898209e4 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_192/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_192/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3a8cacee1ff38f4c2b82d4904eb558db2b78f1c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_192/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eabe552b330a73ffa7e04d5efb0ec47455c17f1110bd2c390edf2a67789c4686 +size 2269745 diff --git a/eval-results/arc_challenge/25/ckpt_192/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_192/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8492ff36d3604137a6d9601d7872846e9e8007d9 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_192/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7beb9e53f147100f48881f7565e3f6aee48494881d1939a85ec75ad55a9d891f +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_195/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_195/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4576cc75a2d04903829bdfb0a59508254c4e8c54 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_195/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7616b8f0154866ac17ae8b87bd84432ed4550addf3e2418f3c12c43299b68261 +size 2269787 diff --git a/eval-results/arc_challenge/25/ckpt_195/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_195/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb0a0a18307a67f526a1371bbd3a1cd2ec1e2b56 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_195/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11304c7d99f7c3a167daee922f1550c7a5508d345123928b671841ba0492da46 +size 2713 diff --git a/eval-results/arc_challenge/25/ckpt_198/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_198/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c60c8a17cc3ed20f7858ad2a2a3c9b635c78f0c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_198/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d79d85c948e9042c11958c69817d969cd8dcf6703216d610b411957ce990523 +size 2269598 diff --git a/eval-results/arc_challenge/25/ckpt_198/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_198/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02b17f5a852738b9c1e637528ed6b267e3f23718 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_198/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:102d2ed42a2212973351bc25de895948f8214f8b4263c4d039eee0c050abed13 +size 2715 diff --git a/eval-results/arc_challenge/25/ckpt_201/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_201/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36f5df6299aaef3caed80dab46df44a99dc8ebc8 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_201/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f586543b0ed7bf82c311ce0c2ad20678f2297b27ccd8249b00a84f7bf9885d3 +size 2269476 diff --git a/eval-results/arc_challenge/25/ckpt_201/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_201/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5611b7f9a54b4a18dd194b7ec1d3375faf6236b --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_201/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57775bf3847d4c60840422f48edb2c2eda649c08faae6a25d912b0d3c3b8da1b +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_204/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_204/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f60eed522ce68fa63eea93b144d270ca0aa82f6 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_204/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd34349944d0cdfef75f2b98a95c75eb7f868edce47d0cadde532171ca300a63 +size 2269670 diff --git a/eval-results/arc_challenge/25/ckpt_204/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_204/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc25ee4dea794a9c37e132657903c190bf2fc5b2 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_204/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712fa750d9d7c093ec1373662dbe9516144d34693022256a6df4d9a33b283740 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_207/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_207/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82b6c6f32a9022c468094bf3f4cf61604bb24e35 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_207/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd03973953f356c56f595cac30d3fba58058dc9db5d688a279e0b21d8e9c06dc +size 2269401 diff --git a/eval-results/arc_challenge/25/ckpt_207/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_207/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5af5b17665cadeaa4592f73cf8d9faff330ef91a --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_207/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57d51804310d733e5901aed8621223f117c7fa8666ea93cb75bafa8ccf5b8825 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_210/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_210/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fea47cf42062b369e5db2ce3f3d636a3a73cf676 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_210/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d462c56f4fbeac86b811e2463f9a5343f2bd169ea2fb84d00bcab6f3f9d0509 +size 2269655 diff --git a/eval-results/arc_challenge/25/ckpt_210/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_210/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b3aff03c3c4fa7d7fb24e8583f9b04fe29e3f0f --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_210/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0470c657f4d9fe422920fefea2d4c5934dfe8864ed39a392dc3324097d48b722 +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_213/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_213/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea0c0a4bbc8d50f318deb1dcf0381eec59e407af --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_213/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:579abef41f2dc642d77d184c08356cd95fe7ee70c70b4bbf5a031ad461b80cfb +size 2269738 diff --git a/eval-results/arc_challenge/25/ckpt_213/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_213/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50fff5067a0fa0d9073bbe87ba678fc97dcbec43 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_213/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384d14071944c45b797d8f0c91ba6824eb998d179e553aa4b349de35aa23f881 +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_216/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_216/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2b1f67fe695b7516764e1884b7281c2b7ef85d8 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_216/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd0d02aa6c0afdfc52fee59443a3bca1580b352a77d826458e20f1e2b564499d +size 2269628 diff --git a/eval-results/arc_challenge/25/ckpt_216/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_216/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5162301072c50416aeaba97b3f3904d5e56b7ec5 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_216/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd9e4c9e87fefcba8f4b3ef0d6455a213fc1d8d33142004c35c1a458785bc539 +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_219/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_219/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0e073719954f737bfb372432b918a3502adadf9 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_219/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b05f2341d7c32a01c806eeefba46ef9969cbae6bd63e42c6f83258e86d1e0eef +size 2269547 diff --git a/eval-results/arc_challenge/25/ckpt_219/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_219/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aed52ff74ec58fd2b868bee7b3c5c17b36ba6b95 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_219/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf08f2fb79677b2b8ef456d2438a10717a3afe27b3511d7002d7472016384900 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_222/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_222/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a93bc6e7ef7f3b44a70acd891ec4a091412ff38 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_222/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:582283ad062e9686a7efcba59f1416d308696c8da1951a88e5142897d5060657 +size 2269854 diff --git a/eval-results/arc_challenge/25/ckpt_222/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_222/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4bd354b054abbef8eda7801b2da526d97152a250 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_222/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d9ecbac6dd88b94c38af03c9244d34ae8dbf9699627252198b1f10bda478678 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_225/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_225/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..550dd014a06b6bf0a0197c5c852fc7e7672d8a27 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_225/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebe9c83cb01b14f56bec888bf8760f404cc9db05d47bc4b578896affef4d2f33 +size 2269494 diff --git a/eval-results/arc_challenge/25/ckpt_225/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_225/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e06abf22ebf50e92b41330cb54bcfacafd1613a1 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_225/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f66a4b498b748153d21f3cc396633e834097111635645d185673b693e5eca8e +size 2716 diff --git a/eval-results/arc_challenge/25/ckpt_228/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_228/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..743f8a55b5f36632f1ba75d8b354b9213020bb18 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_228/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65eb5b7bf2891c2283ced5d9b7264407247b7357c3f1cd90cbdfb39155e77417 +size 2269690 diff --git a/eval-results/arc_challenge/25/ckpt_228/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_228/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e9cc99bc06d1bba819e40068895ede1a27e4600 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_228/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebe4380813dbb92bafc8333427606c07ad5f46dd7a5630c2c91feecef426c08d +size 2721 diff --git a/eval-results/arc_challenge/25/ckpt_231/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_231/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b34d0174989e0531938e4d92357903abc5d8b439 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_231/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6da6f1a93a6d5530fddec77522b1e00f35ef270797a275b5a81696ade78906 +size 2269762 diff --git a/eval-results/arc_challenge/25/ckpt_231/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_231/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bc200401c55bb9c6f32953ea22a6e5d15192947 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_231/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4998fb670f8d874f4dbc86641c6f9460f3900792e0321352120219d607339ced +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_234/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_234/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6c7d596007edf3746cc8f987ad5a91c23d36eb7 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_234/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a196b6fef19dfc140556fbf12705fa663f9299b774aaa8abfdce6bf47739041 +size 2269728 diff --git a/eval-results/arc_challenge/25/ckpt_234/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_234/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61cf583229ac528db6c4607d087af4ab91f81152 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_234/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c6944a0409d4086daa8451d97a5a9a446bd2322dcb2cd4452a0029ff7c52c5b +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_237/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_237/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2b087a02de93d8702f56792d29f0f43fe80c0da --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_237/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0003f0494cbade19134409ececdbecea31bbeffa8518e1c9029205a2b0f1fb3 +size 2269741 diff --git a/eval-results/arc_challenge/25/ckpt_237/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_237/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d398b8cd222257e67f181d41b37440423358d2c3 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_237/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2096e42154620dbefb1b8dbae30ce7685a6a481bb1508d3d680dbc862024c9f9 +size 2713 diff --git a/eval-results/arc_challenge/25/ckpt_240/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_240/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3ba625c25495084b10fc4bce7c57a75e4f9db45 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_240/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c30a947584935d7d54c3e36bbd57f0a985b81fc3fe2359d3caef7194489fd3f6 +size 2269547 diff --git a/eval-results/arc_challenge/25/ckpt_240/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_240/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbead6c7bebe6a53d1b37e2f209145197e915c89 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_240/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a62ddb0245f824fceaaccb5222d862b0c908a1af24ec22c959de30e1612d7ae +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_243/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_243/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9cee2a04906f7796828f01d20bb9fef99087550 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_243/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8303710df0f49668bb1f32b3f7cf0d15b5f43fd76e2d0cca9ac23895da32475 +size 2269683 diff --git a/eval-results/arc_challenge/25/ckpt_243/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_243/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c4275d5cdfaf9a41ce7aae0dad9ffee1c042384 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_243/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a29e46deebff69cfd3083d06ead61bc69836fa260cb58e23c221cdf46fdb3f1d +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_246/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_246/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c827cbb7e252395e58a13bbe32fd33788c3df4e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_246/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b12303e8b5f8f8e0e4fbe3e0a6ba746b0695a3ae3616ef79d422534f80bb4b5 +size 2269713 diff --git a/eval-results/arc_challenge/25/ckpt_246/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_246/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..073eefa529b8b0eac93aac77a1bbeefb64583b1c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_246/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:436aa35f2085ce5fc7e7409c5b0e6ffa79b8ba4b2cd385368f6eb78613127761 +size 2747 diff --git a/eval-results/arc_challenge/25/ckpt_249/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_249/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18e24085e772d8dac5345aed90430e92818f01f6 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_249/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46bdc0096f165f73e4559e57b30fe1d344a769f0fcf40ff5747f1f8988de60e0 +size 2269521 diff --git a/eval-results/arc_challenge/25/ckpt_249/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_249/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67d7781af9fde85d124f11dc733bd783681045a3 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_249/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508f854d146d44b2ff8af9cbed5d5d79ac45ccc5627bfc954df7c8a7f45ad278 +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_252/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_252/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d184c58aff1eb76995280fa26fa3777cf1f426c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_252/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957ccc6b3919c8c6023c8b0dbbe5480359b43d8ca2e44fcb4d8be4b5b45f68c4 +size 2269558 diff --git a/eval-results/arc_challenge/25/ckpt_252/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_252/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9701491490ef948d942b0bbb701487f52213d756 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_252/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4691b144a7e44809e03fd7bedd0e1225de9959773001c72a9c1531f5c4fc361d +size 2721 diff --git a/eval-results/arc_challenge/25/ckpt_255/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_255/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..798d2b768660d02257da0a62d89e775815aa636d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_255/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc02b53a9fdbf1442bb875ffe38af05c4a4b617dfb7839b67c04508a5188eaac +size 2269678 diff --git a/eval-results/arc_challenge/25/ckpt_255/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_255/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e2a3ec0d8c985c34947547d809887a298beb338 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_255/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5ef569dc26f648b204428e4bc8ad39c0ef140ae30bec410c0fcfe1c78b3cdc8 +size 2714 diff --git a/eval-results/arc_challenge/25/ckpt_258/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_258/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4a9b8a5b3d6b0f0309f7908a140c4e4e25d28fb --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_258/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99cfacfcb2c12a39ce0395c987d68ca0b0bcd0df4c6b3c5472791f0dd823824a +size 2269869 diff --git a/eval-results/arc_challenge/25/ckpt_258/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_258/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4830fa3a0669a389687b758ca5127c3074195a42 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_258/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2191ec5ea7e263214396ffad08f787223f18154212930103d0403e9faf86a1a +size 2721 diff --git a/eval-results/arc_challenge/25/ckpt_261/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_261/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51591a93a32b72c99ceddbceccd4862d6a487a5b --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_261/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c514fce86adeeca3204c8ba89b4e3e82a94ea63c2c2b04eb6e7e8922f5a6045 +size 2269620 diff --git a/eval-results/arc_challenge/25/ckpt_261/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_261/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e651a3665fdac73a75ee11c361b4e249cbd3557 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_261/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d3924b1169517d34876710a29d2a17786469913bdea30385c2f47b86ff47d9 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_264/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_264/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..585de58bbc31043fc9408d6c69f6877bf0bd3277 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_264/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67e313e6700883e26b8211920ac96e69c0e1c6be0fd4be1689b58ce60bb5acf +size 2269710 diff --git a/eval-results/arc_challenge/25/ckpt_264/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_264/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8677e52846269d350abe692c988b5a3a1d6e20d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_264/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5e5e895a6c29a5a6e967a75a0dd04827fe2962bf33366ae7be000b79ab90d56 +size 2715 diff --git a/eval-results/arc_challenge/25/ckpt_267/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_267/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2004fa667eb87b3f30ba5e585e3da7e3d4603bfe --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_267/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f495905577dd1c184dda461cf454f4e113e34a4575fc52ae2498aa2e3d2d32d +size 2269742 diff --git a/eval-results/arc_challenge/25/ckpt_267/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_267/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43953573c986fe820bee85fed19a2d0a14e7081b --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_267/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda2d4e60a7306936bcbd151ba7e10f9c287643b5bfe9c1a22377c6d38618a3b +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_270/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_270/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..385faad55703bdf9265f1a7e1a7e7773691e9a80 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_270/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dffb14b6b1c38a8eef5f1712dc6c222aa999db4ec87bd9185b7417c286e92431 +size 2269710 diff --git a/eval-results/arc_challenge/25/ckpt_270/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_270/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6bcea81756c6b8401d7eff8bbd89581f709a30f --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_270/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de24aa4388f23f0efe0cc4e6c022284635a744b8ad93d7b510c97c10da1238a8 +size 2749 diff --git a/eval-results/arc_challenge/25/ckpt_273/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_273/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca5d0206b61738088432461a66563fe24bf10c16 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_273/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:757a0fef931622331d2e8ae9ce3590c2a2ca357ba5eff1449a48bfdace17f80e +size 2269822 diff --git a/eval-results/arc_challenge/25/ckpt_273/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_273/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e6efc2ed31c7e2987cd846a029d18b2c43138b5 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_273/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01ef71803de4c50923fd518729fa0a2ebc40f672224ded0541cc06083b91fcae +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_276/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_276/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..178a80c94d9ca418b39fcddb743b7d17117dd2ab --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_276/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20e008d64caf90f5a2a5f83dadda054706ae6a64886305d06678b2f681e0d46e +size 2269828 diff --git a/eval-results/arc_challenge/25/ckpt_276/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_276/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3000dcb838c3a9784fc42cc4b2242284ca57bcc --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_276/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d690f26e31a013df2944551f4d8ca4f726e475e3bd161e2cdc240a3882b008e0 +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_279/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_279/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cab0f5a248929e2326c4a01589357f414799e41 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_279/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:258935c697f2c36d5341131aad31d4f9321f3361aa39e4aa1345e396163379ad +size 2269828 diff --git a/eval-results/arc_challenge/25/ckpt_279/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_279/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e624bcce91db9db9cbcc1211c685dd5b4a30a5e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_279/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:572e2b2b0b28323449c8c5ed2f3d67a4608d695fcf8f1d4509bc6dcce5e22e9b +size 2716 diff --git a/eval-results/arc_challenge/25/ckpt_282/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_282/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94e1f8dbad1a0832e4c3b7bbd2aab5d35f2a167d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_282/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87848689a49bc1cccdacf46978ea1560949445f10f8041bd5c973e49d970cd5e +size 2269845 diff --git a/eval-results/arc_challenge/25/ckpt_282/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_282/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d72b856ec5e94ff3130f413442cc879f026083f --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_282/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0419862c82b820f6d156377f7775fe786f4b633105d3dd6a9bd09c409ccdfdb0 +size 2747 diff --git a/eval-results/arc_challenge/25/ckpt_285/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_285/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6687e7a0e63d4cac93176e24c4db220819860d99 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_285/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e46149840b05bd5badaa449841a77e864344a2622715cd67a3cbdc011da900a +size 2269642 diff --git a/eval-results/arc_challenge/25/ckpt_285/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_285/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f210bdaa6286a93d144aeaf901f2844f1d75906 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_285/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c28b4da599a27689dd323880983e059b5568f749b3522148c47256814c0b31eb +size 2746 diff --git a/eval-results/arc_challenge/25/ckpt_288/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_288/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5b6892920ca5859300adef5b3d836577016bdeb --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_288/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:930d565cb2558a1b92867ef5e28aef3e353ec5ca77aaf6770ea60195d7ef2eb4 +size 2269630 diff --git a/eval-results/arc_challenge/25/ckpt_288/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_288/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c290476ebaa9fcfa9732e7781d8abb38fff92c2 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_288/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864493b37402131d9622c474e261ebe0a18eccf6064ee81725df33017949a007 +size 2715 diff --git a/eval-results/arc_challenge/25/ckpt_291/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_291/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b324ca307ceb47a4b1abc579ecd4ff0e71889cba --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_291/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:249b6fcb88a625b0f0652d41bb4669b93d32823ca9ff5fe50516e1ad4f83aa6a +size 2269612 diff --git a/eval-results/arc_challenge/25/ckpt_291/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_291/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd14fe6b91365fb66142ebe5d2f8b78b6a30a3b2 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_291/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83022e64e216a0f0252fdd867f7a8aa968a337f3d10154cb246c822b5c7e52f3 +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_294/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_294/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..faddcc69a397c82c8fbedcf3013746fede2ebc38 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_294/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4f64bce5e6ac824401334fe92082cddfea4cb1ff0bf49ba6906ce43a879b818 +size 2269518 diff --git a/eval-results/arc_challenge/25/ckpt_294/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_294/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25b293516946af16f4039cb39f623d6d50765ac2 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_294/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8457c6fa08da6ae5cc17c84d275e59ffdd4aff561f9afc78043d7eb3e63fc867 +size 2716 diff --git a/eval-results/arc_challenge/25/ckpt_297/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_297/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23c707605f092299e2a98c294b28d35a9126ed8e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_297/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dd78a6dbcb3e00d00080d7519cc7c0c12b61b886d5f7d8aa94a5a16b9ce5913 +size 2269739 diff --git a/eval-results/arc_challenge/25/ckpt_297/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_297/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5229856813432b0568f8ec4e1f4fe1db856157ab --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_297/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b1fe5d5007e182fb2e97ac8ade0ad6589d38e0bc087a71a388ff966aeb45403 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_300/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_300/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6dcd5446da4a9593ac65def1e1ffbb0ce181fd9b --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_300/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22bf01deb1652e4ea1f8fa1897996a6e035ffc2cbd8fdab387f0126df8c7c171 +size 2269670 diff --git a/eval-results/arc_challenge/25/ckpt_300/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_300/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4696210a4ba9de1d5eee439960cb8b38aa25959 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_300/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d1b237b81214bfbea0696eb18f13ad83e7994e99b40e1a77c842a3c1315543f +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_303/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_303/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21227ad5c0811c17dc9e1aba395cb6b4e53aa038 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_303/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d713ba27f475fa67f9b90389004efb7c8ca3bdacd1bfcdfefbfb8b608c9e1203 +size 2269895 diff --git a/eval-results/arc_challenge/25/ckpt_303/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_303/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad6d113fa994dd30afb7e2832dc1c0787113b49b --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_303/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a74542a01a6f0ea816e4595e050ffd539bd28a4d110d414ab7a5e8fea831f77d +size 2721 diff --git a/eval-results/arc_challenge/25/ckpt_306/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_306/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19c775d0f3ef296912611e02b5f76fb245be424e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_306/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:530f47f8d68ab65ef0305b399d2e1c928ac9f90365f0fea8b9f3c00dc3d950ff +size 2269637 diff --git a/eval-results/arc_challenge/25/ckpt_306/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_306/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06f90eb099aca6924ba844efc258294b6051982e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_306/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f1715604f0c98f9150a48df5e17c3799d9a34291e5cc5e2072f522d0c1454d +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_309/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_309/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..047e5e53b15b54ba94e4f1c3459b1a9ef58df0a9 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_309/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26cf239c8634ee0ba75120e59efccf71826b5f1d923e5e11ce1813515148239 +size 2269602 diff --git a/eval-results/arc_challenge/25/ckpt_309/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_309/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e3ddab80a48de368dc683375b22deaa04f1cbe7 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_309/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b390b7125f789ce63694f76abd4e03659b64dbac99a38adc7d19a52f9673590 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_312/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_312/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67599af6c37a014556ef66c6eb76e18e3d7f7393 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_312/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a5855aa6f189695eafbdbf447b84adb26da3048363098a44fd981b646ed7bc9 +size 2269519 diff --git a/eval-results/arc_challenge/25/ckpt_312/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_312/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..887a9ce87fe45534b6620de51dc7538c264d9402 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_312/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d8001eb4816eeba95df3757fd89a8dca15fb07e251c61f281fbd1a61e3bd46 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_315/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_315/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57a9ff752b8b7521b1a2fb7b4311e9f445f2b3b4 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_315/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3d2c3f75d3a1c36ffd7024897462eac684aebb596f9907211faaf318fa3d95d +size 2269546 diff --git a/eval-results/arc_challenge/25/ckpt_315/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_315/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1629d20c7c4c2bb2df1c972e5358466f1001203b --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_315/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1538621ce365613b49de11b08612f9a4a6777bba70bd5b635472e86d17b2b8 +size 2718 diff --git a/eval-results/arc_challenge/25/ckpt_318/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_318/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b1bec003cf4e10a886e887940d27d48f1968ff8 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_318/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abfd9572164fcb5958048a0381a4c0a95d6b141d966f06ebdd21c54c64f5c9c2 +size 2269650 diff --git a/eval-results/arc_challenge/25/ckpt_318/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_318/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e97acc251c2a15459a22683fc361add3d02213ce --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_318/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bba2adcd29fb3fed547687e5ca64e6ec292942b738cc85cc1d0d8df11a0c201 +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_321/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_321/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa85f34c104305f31793d280611fc7760e38e91a --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_321/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2105d6bbc2939fab882457abc7b5c4ecdbe3588f14d6d0c7ca87dbf9c8686d +size 2269575 diff --git a/eval-results/arc_challenge/25/ckpt_321/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_321/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ace5e09d20265260ed3b9e6a1874ab2ec1db98e --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_321/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5650ad9cdb3e8aec187562f89a1ee8df7774c119f39a91d9a6ab96deeabeec4c +size 2723 diff --git a/eval-results/arc_challenge/25/ckpt_324/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_324/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e259669b82830c72fafe8e3708ad5ec40a89af49 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_324/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10abd0b697503e3bde241fafcd4193721b3943c6d15ee455bc7bb9d795a43702 +size 2269602 diff --git a/eval-results/arc_challenge/25/ckpt_324/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_324/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83021554bd376489d205a6ee08bf77ebce6efe33 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_324/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a594775ae85b64ef5942035d28da4e152f890cebe710fd73595d546352bdc046 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_327/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_327/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ee21fdb941d9061efbd42e197c78282e9905995 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_327/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e772a9e47419e0a25f9319e0e8e315a154b0e4357ab0af5829118ef25c3ef9a6 +size 2269796 diff --git a/eval-results/arc_challenge/25/ckpt_327/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_327/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93e5f2f8405bf4418380b450ff98d06f911d49bb --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_327/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afe48ffd7564d3702b705e3be313d32a3f11bacb9b4f23241f94b6ecdcdfd0b4 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_330/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_330/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bd3ad83fa2210f788b37db7549e5b0ba101c11b --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_330/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78b78fde9782814e1a27d00aab7853a61950118060d268fa9692fb07564ec7d1 +size 2269727 diff --git a/eval-results/arc_challenge/25/ckpt_330/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_330/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e15997c30906df387c34eb9ab373dcd70739db38 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_330/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa2f612d697ee3d8c1b2c8e1722965557dc19e28b1b24781ba7ed54f72c24a86 +size 2721 diff --git a/eval-results/arc_challenge/25/ckpt_333/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_333/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d14f27615f3b9ba62979c199b268010907e3bda6 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_333/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55cd9ba89b335760114a58d775c32f9a161de261091bf414ff152f96a8e305e1 +size 2269550 diff --git a/eval-results/arc_challenge/25/ckpt_333/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_333/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a237af0d1a1933c4ecaadf273625a20c042c15a --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_333/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31c08998cdf5a4779086c1da4a601408f83403f84481abbea64beca342887e1 +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_336/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_336/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0bbff3b8dd6a4e88d22e26d2be53368719d0b13 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_336/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28ff3056c6a1d9f718de96a4a5b960ca9d15d34c0fc02e2da58996a753f81d12 +size 2269701 diff --git a/eval-results/arc_challenge/25/ckpt_336/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_336/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d83e52ff4edd782c591522baf788dd899068af0c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_336/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61c3c7ff3d217eaa03b93850d9c91ed446cb548e479f31b1137ef11c1264be29 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_339/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_339/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6401fcec44d07b13e9f1aed2fa5528fdcc49bc5 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_339/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2977372b2fd166dabe83dafc513b36d2a2b98d5be6846aedcf7fdaffdd6e8849 +size 2269688 diff --git a/eval-results/arc_challenge/25/ckpt_339/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_339/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d1f5b07c2a1b45e9400a195120455927018bcc9 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_339/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34694606ea8021f8f122050bb91b30f84bd76f34a16afdcfe8d93a69a1fe972d +size 2717 diff --git a/eval-results/arc_challenge/25/ckpt_342/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_342/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c07f04df6dfdbe969f0c52a46523169a1e51111a --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_342/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f70b2282bd14fdbed11cd7750e1381f63c3794b1ebfa7228e72161722a98312 +size 2269618 diff --git a/eval-results/arc_challenge/25/ckpt_342/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_342/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a1f7c7dd19f04f507adf8504b57a64a51528134 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_342/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e5613d0cc67ed811e69d2406bf6f22ca3b089e798f59e5c90e9cf41e8e92f6 +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_345/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_345/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3da54aad41e11d6c3a0db3dfbca49e09f0577f08 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_345/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:555be24fc8dcd3c645f4bafbc03c67975df934449e7b2687d0d7dfbe930ac53a +size 2269620 diff --git a/eval-results/arc_challenge/25/ckpt_345/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_345/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e4cc00d9e9f59fde2641a318f6e227e8913db10 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_345/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c47d8c57e43e93dd3e5287b313871a5a82285993f067eba5bb85d97b9971d9 +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_348/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_348/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af0c3fe766611fb204daca3fc640259a4f3f372c --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_348/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2317abd0c1a46e49f7d7b7d031c0759b24f75575469cf8ed9a71ea2e9a657357 +size 2269528 diff --git a/eval-results/arc_challenge/25/ckpt_348/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_348/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9b2f8447713d8a3a09e25fd214eb540f5221563 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_348/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94401ca86f1ba622c72b60ebee4eda55139940e40b35c4114c6f037359035621 +size 2715 diff --git a/eval-results/arc_challenge/25/ckpt_351/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_351/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9acc366519838f32d1f64490f1ed854753e4b345 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_351/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a7f0e34950fb4990846db4cd8248cdc63626a61bed51dbc02e464f98b569462 +size 2269732 diff --git a/eval-results/arc_challenge/25/ckpt_351/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_351/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb61ab2b585ae2020c482a09ca95680b610e3474 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_351/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ff308ca9602c4500d8f587918d8e8d71d5dbc19e2892a474fed81132d001def +size 2720 diff --git a/eval-results/arc_challenge/25/ckpt_354/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_354/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..034ae615a9b8a32f555eddd8b3a006e44abd298b --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_354/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c7e6857a5af01c908b2f341bf67f5163007e8e14560a7f64f773e2d4802163e +size 2269708 diff --git a/eval-results/arc_challenge/25/ckpt_354/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_354/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15b67122eff50119a0a1df49228fc57ba0809d2f --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_354/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d565ad97638f5fb92dfbfcf5cda467e36deaa4e72bbce852a46a1fdac34c7d69 +size 2719 diff --git a/eval-results/arc_challenge/25/ckpt_357/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_357/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..354ea190a9d82e7c972554ede49cfffcfe93b641 --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_357/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3d3e760b07025d5d4563d0bbf63c485b8cafdf0ac4f98ec3cd15e628a18d46f +size 2269806 diff --git a/eval-results/arc_challenge/25/ckpt_357/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_357/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c10d46ec190f1f20b7a0344727d75adb28934e7a --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_357/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff259b3333abed0165db429ae69e72c8fec0e4232d6d34da2007e874b8e5f02e +size 2721 diff --git a/eval-results/arc_challenge/25/ckpt_360/arc_challenge.jsonl.tar.gz b/eval-results/arc_challenge/25/ckpt_360/arc_challenge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c797b084111ed70ba6f4ad389af27e349318875d --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_360/arc_challenge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ac0da285b392f53602272c550db8d9f5e56b096c1162c51cab146f4e17071e4 +size 2269557 diff --git a/eval-results/arc_challenge/25/ckpt_360/results.json.tar.gz b/eval-results/arc_challenge/25/ckpt_360/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..021af3227ef3f0a440b287c201759722ad9619cf --- /dev/null +++ b/eval-results/arc_challenge/25/ckpt_360/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91609d83559cef88e8c11feec60d18844efea0ee467701153649f66fd84e7178 +size 2720 diff --git a/eval-results/arc_easy/5/ckpt_003/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_003/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37b771db1c8bf32ceb80ae67521a9329e4e64565 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_003/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1f1a6bbbc819d663f2da56cbcf21c2fb9d2cb22103b632086d2f8df40083e3c +size 1329295 diff --git a/eval-results/arc_easy/5/ckpt_003/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_003/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b4e13219569fcf3acbbc40fd5afc3335f9b5de7 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_003/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b12839c5a4672d634a1f22cbcd86e47010ad074c7d45695332e73f531cbdeb +size 2708 diff --git a/eval-results/arc_easy/5/ckpt_006/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_006/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c72a99a2ae2b45281a9ddfd559c28080529cd56 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_006/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffe6500b1be1a3a3ecbab12936876a3f3214dfdda5e516dfe6cf4d87ea2b56e +size 1330064 diff --git a/eval-results/arc_easy/5/ckpt_006/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_006/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..491304a9ae905df98496ed27e1918225504526d4 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_006/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bb0e9f2e2104c4104d71142c412c2b6117c1e2e583cc3b553b135b32f0f3f16 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_009/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_009/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67b2b14d1f898a6af527d35d0a946088f8949928 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_009/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:648e74fb6b97ab70c1919ea9cfa49f051e710db780a8d2bdafffb4b5e206fc09 +size 1330257 diff --git a/eval-results/arc_easy/5/ckpt_009/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_009/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0289e87c42794b6bca3e393ab87045a230974b9b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_009/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:193f88b2cdf9ce0df1d5725e0a48767d4f2e5df94b70f45b6ccc366023b8f58e +size 2737 diff --git a/eval-results/arc_easy/5/ckpt_012/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_012/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3511ba15eeda3e092e9e4beb1366759961036b66 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_012/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1528ad226298a44abfcb98ffbd0496b5a274b6052db5c144f6eb5fa2b57505b +size 1330342 diff --git a/eval-results/arc_easy/5/ckpt_012/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_012/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70573b78bd8500ecf3e8a89fa3cac44a602d6a7b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_012/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08aa76266824c5ea0f6bde87c888d20e79721d9ae5ec92e0be736c9d25866a59 +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_015/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_015/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b380f649cf837c7736eadf90a67627938f77ae81 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_015/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a749f93b556129bae8027d0ea5888387511ef3d1296e78b92b5bde71dfdf223 +size 1330470 diff --git a/eval-results/arc_easy/5/ckpt_015/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_015/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ebd6e45cba97ea48e2d6628973d72e22fa3105e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_015/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d81c2da8d87653055640f4e3104835003df507d3d462f0d9cd3c40deebd37fc +size 2704 diff --git a/eval-results/arc_easy/5/ckpt_018/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_018/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75bf6a6092b0ddf2ae1c77f3af400857fbc58dbd --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_018/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3308a0dc591bc85ac3ceeb5265dd6812e0b39f6d14e4b37db103f719349fc896 +size 1330593 diff --git a/eval-results/arc_easy/5/ckpt_018/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_018/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c645bf36ca7857d55ca3adaa25ce8b28380ede33 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_018/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c568b5dd867b5515f8282b18124b1f8806aa2b7c4eeff9070fa998c0ccf5e82 +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_021/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_021/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d80defee40f73a59dee57f1bb303909fa98f869d --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_021/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b374bad8dbe8cf4c1db8a8075f176516438366a5e8ff04296fbd499d08de6e +size 1330778 diff --git a/eval-results/arc_easy/5/ckpt_021/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_021/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9837d0e1ebfb629f42b6650e33c3aa746e448ce --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_021/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695b68f5051c636b281b32418ffc0ac583847e176b0e8af6bd795461d57224cf +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_024/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_024/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f14ab8ef80d95256ecadeaed79027c245563c72 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_024/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb335e4cd5f4a3ea310bbaf27279937383d63b5077b9997a743d43ccc3c8610 +size 1330556 diff --git a/eval-results/arc_easy/5/ckpt_024/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_024/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66cfec28fc554432ec422844178ddb0e36955b51 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_024/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f761be1a264f1332e2e44cf671c5cd2ad8bb11a66f39bc7185f226753e24de4 +size 2697 diff --git a/eval-results/arc_easy/5/ckpt_027/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_027/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a59169b9e3b3738c060b1deafbe87751b1445e94 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_027/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a43abb87ca5235b87799b12f2c790703bec781b18f8488ccfdd9c4128f9da308 +size 1330554 diff --git a/eval-results/arc_easy/5/ckpt_027/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_027/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a36eefb3a461c6c0a0ab519ff7757b0cbe529f4 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_027/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0cbe3326c54063459c7b98e635d6b98a05c72b2607e60d6b6d56299d30a687c +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_030/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_030/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c2e0571703e35b68bbd37a0d470b4ee989c998b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_030/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c10dcdbf8d3a05a192d3642023866d1a48ba63482aa1dcef238071956cfa141 +size 1330918 diff --git a/eval-results/arc_easy/5/ckpt_030/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_030/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84a34be46ec56c3c9d5db430c8491310747f375b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_030/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b6bf53f9dc7325794f1396bc9af60736d006ec809462dd77f7bab4c35897339 +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_033/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_033/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00c766533fa51f233592c4be8b2af4e2c1dc877b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_033/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27a0117f1d4d917a7bb7ef876fae7b37a2d23457ee00a6ee0980139eb4f90e01 +size 1330761 diff --git a/eval-results/arc_easy/5/ckpt_033/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_033/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80230e186ffba329fa440656580bd40aefca52ed --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_033/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66784eedd34f2b80dc2ac4cbb7500e2cf38ebc1bdabb0aec0099b0e58b645320 +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_036/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_036/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e43c4b948ec57cd279644301a1d1151cd9e7e97a --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_036/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fec98f36a96a9c333ccf28f9dc9750851fb44d0abf7cf814b614cd40a0974e64 +size 1330779 diff --git a/eval-results/arc_easy/5/ckpt_036/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_036/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36c5a704685e724e7ca5e966603d6284e6b82fd1 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_036/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf74e37b27798f44b1775bfe468a4e5f4f7cd690b093a8079f3db0dc84812149 +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_039/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_039/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f4b6a6fc7a429b00d11cdce4f8823dfad78803b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_039/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c462ca26ab0451921523f20bdf197a5ad7d5f0da28a700c135da7c2f47919ff +size 1330763 diff --git a/eval-results/arc_easy/5/ckpt_039/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_039/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e3cccefee397057558502d69c30d4cb2b0b10dc --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_039/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aa03cf42057051372dd31cde81989cc508791d79b76d6f68b490db8f08ed4ed +size 2710 diff --git a/eval-results/arc_easy/5/ckpt_042/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_042/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c6ede2e28d78322af0e43e0ea47600e77de7943 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_042/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e271127b84391745248fa171f70b3eb37759178d313b08ee8ffe3edf8361e8b +size 1330786 diff --git a/eval-results/arc_easy/5/ckpt_042/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_042/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fdc8e5ca7e3abc702918cbd2580a60dc52bb1bb --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_042/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ab6490479272466d24fa0224b2e49a2236c72e9dd91c5706c1bacd59bb1944f +size 2708 diff --git a/eval-results/arc_easy/5/ckpt_045/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_045/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a614caf04160391fbaff32200195e87332092f72 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_045/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ee6864cb65a4bdec6df2090f321ff59d92ff220db5a752cd16a422730b1a8d +size 1330899 diff --git a/eval-results/arc_easy/5/ckpt_045/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_045/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a6757210f7f20a8395ada157bccfb88a9a92c77 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_045/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4658e6c9a5ae3198b6f3619eea421198ea83ad23be7913e851d12f0500627ea8 +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_048/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_048/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b6c74fa871a9f179e72cbaff05958af41f0bf34 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_048/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd132bb22d94cb4b9c52e7d93f806e1405b78ad745a9e392348618a2c6069459 +size 1330729 diff --git a/eval-results/arc_easy/5/ckpt_048/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_048/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42ace3c2c40b7101e6549c702b436c94ad5e84ca --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_048/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca4d53974bfd82eac23368722409f4b47e37fd1eff6c2905cf4b4d8f247c176c +size 2699 diff --git a/eval-results/arc_easy/5/ckpt_051/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_051/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4bb13f90fca8348db00f9d9a85c23e7d9485b90e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_051/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0dd072fe6d7ec23afc3812b5f0bd6c78c047a94ec1d15564be2537bf872fb00 +size 1330881 diff --git a/eval-results/arc_easy/5/ckpt_051/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_051/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db592aa0cc1466fb5d35cf24aaf5f939b511032b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_051/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc81ea4a823dbdda94a9c4a43444820a419643c0cc49f271021348236a63935 +size 2735 diff --git a/eval-results/arc_easy/5/ckpt_054/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_054/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74317c4e7f730e1d0e6ec6173860d343e0492f39 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_054/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3f37168ee687345619c1d1e437ae4fa3d0ff9298936323de90ee9ad0137587 +size 1330820 diff --git a/eval-results/arc_easy/5/ckpt_054/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_054/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e35b252d9fed918979244bc2ee627d2c10b63691 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_054/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43a81dcdfba02968a3de93eda0467db689579a34f67ca1bc115f153cd49ae55d +size 2735 diff --git a/eval-results/arc_easy/5/ckpt_057/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_057/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2529c95c97482516e51ce21e2417d81e863a9a6 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_057/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc87ed5ff0334ef323dd3c478858a38d0631251b35d74a8dccf23a97b75c6fcd +size 1330943 diff --git a/eval-results/arc_easy/5/ckpt_057/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_057/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8430cc2f9be279d110073efeccc413164dac210 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_057/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c23cf415c0cf6e43db98ee215634c06fab28b5c734b2a499dc288016e044560d +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_060/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_060/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5df6b55089a64f007b3c04c6f3694b87c6f5e13b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_060/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a31e4e85a47e4e2bb07a687d255210d83a24ff114aaa1a310c05b01f7ddae83c +size 1330932 diff --git a/eval-results/arc_easy/5/ckpt_060/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_060/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63385211718aaddd340d70a1855d1aa8e15b9294 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_060/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8702f572dec019d9c976a315ee6c52e65c7f2724a721d971e4bbbca4808402c0 +size 2709 diff --git a/eval-results/arc_easy/5/ckpt_063/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_063/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07f7a2ca0d2a6b3c955efcb4cc3425bc75a6bc4c --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_063/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c48b242bb90e6c016358ed9ec9fbe6637da555893ef3932a83dc0d6218fd04d5 +size 1330728 diff --git a/eval-results/arc_easy/5/ckpt_063/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_063/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16c624b6de1e52c3f2fbfb98e02a813cde6c577e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_063/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a9f62cb528216467e1f6369f1b13b190e6f5c2d4e44cd828ae31955f50f6063 +size 2703 diff --git a/eval-results/arc_easy/5/ckpt_066/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_066/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e20c80eed2ba689876ad3d46c48403e0012a6a39 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_066/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6bab4737c17b8392044877c3c811560604b5105c3cb645a14638b6c46173b4a +size 1331038 diff --git a/eval-results/arc_easy/5/ckpt_066/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_066/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16f3901cdb73bcb45261969b3b3c48ddf63f7b4f --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_066/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba4ca684a3ad9f01c37762b42b8e6cc20b7f1db14a7f6ea0d94f4c64744efce +size 2702 diff --git a/eval-results/arc_easy/5/ckpt_069/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_069/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..533930ac780214a43e025b397c56d4696f512ae3 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_069/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b0ee31897dc81396fd73e1291adf4904f56fd43812bb96dd85385cda16f6e5 +size 1331132 diff --git a/eval-results/arc_easy/5/ckpt_069/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_069/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c6a72c04eda3c6bd905d1a25b3a941e6488859b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_069/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f0181e261c76ecf404c4720f794537a8bc3af92f5d871457e2b79329ac8aac2 +size 2702 diff --git a/eval-results/arc_easy/5/ckpt_072/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_072/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8868c13c7dc729d8e032528698247da61d3558b5 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_072/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92003b8d2e284d93099ac8d7160505e52df5e8a7d27156b34f1b2ba14f6029c +size 1330970 diff --git a/eval-results/arc_easy/5/ckpt_072/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_072/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6035894aa8b415e2ae4604b00b6b2002f1fc2335 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_072/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a1593ad603b27637dcaa022a1c108e99be1f12e7b0d4851c5826410caef887f +size 2711 diff --git a/eval-results/arc_easy/5/ckpt_075/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_075/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d009646fa5580eba98ce56fcd23fb33c05017b30 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_075/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad1dfc527961548e22f61de1eb1c66833cc616550f36e04806c9f44a5f420fd0 +size 1330970 diff --git a/eval-results/arc_easy/5/ckpt_075/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_075/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41d60ae05283189dba69fe690c2082fbcba71d9d --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_075/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14817aafa492c97cd45c92a94b62c5d343fcebffbe37a3470d9687410c555e8e +size 2711 diff --git a/eval-results/arc_easy/5/ckpt_078/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_078/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1562c5d6ef338a5ba17b31c53153c6ac336caac --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_078/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:237ab8ba9c769db6fef3974f432c302833b783718087acd193e34ad9c028f87d +size 1330901 diff --git a/eval-results/arc_easy/5/ckpt_078/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_078/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3047eef278c541436cca7d89670a62e179d6ba26 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_078/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d190f7399f6522cc8303ba2a8d95f953d4200824859654b09f589382f75426 +size 2702 diff --git a/eval-results/arc_easy/5/ckpt_081/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_081/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9032628a1b32a7c16f8abe24841735a392a9c66a --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_081/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cec5931ea83dff6b175f5de34eea23a55fe9fe5eefdb9343e5c1952f9ef793c +size 1330960 diff --git a/eval-results/arc_easy/5/ckpt_081/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_081/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65b942b35b73a62ae2dcd2e132739d28ac14dad8 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_081/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f54c0f261c3d180a3620bf79ed65c5f02754acb6ba5911fcd49af989a2af1d4 +size 2703 diff --git a/eval-results/arc_easy/5/ckpt_084/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_084/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f2f322606addd1c1560918486b9e792b990b913 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_084/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2ac8885aa36a74b86de817c47db66d23ce310efea1d8b1b7220b5c300dafde8 +size 1330918 diff --git a/eval-results/arc_easy/5/ckpt_084/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_084/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8927b359542418b05219eaedd48ebefe20a15f28 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_084/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f320ddfcb855edd1259e77e27534ed6a5797def3fbcae1a7220e8ce3ff56f5 +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_087/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_087/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..233f76ad9ee38daf39ccbda77d4a54856f85d9c6 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_087/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e310c11f97416d86d838ceab4daa8b5ae638c10d971e49ac76af2c8aece2e894 +size 1330736 diff --git a/eval-results/arc_easy/5/ckpt_087/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_087/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6414232c5218b3bc2a40a1e527bfaaba65367126 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_087/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5bdc5259cacc2f7fb91f4c2e8c0aad06cf614f3ab52835e91a21647bd7ab14b +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_090/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_090/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f5226a56a42cb5f8bdfc56ef9d1b0edfb8860cc --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_090/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e968bdabf9ec3ba2567d9425312452df4d1a1d654845be519868cc15f2a8de5f +size 1330977 diff --git a/eval-results/arc_easy/5/ckpt_090/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_090/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae8eafd977547c2f843e486b615e8e0542a58a4d --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_090/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddf9031b9d12a1711f58b753861e1a5198ce4214f07e9d9654e9d4bd9978eb51 +size 2702 diff --git a/eval-results/arc_easy/5/ckpt_093/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_093/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60bc073076b07906843fb1dd2e6f90d92b85a554 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_093/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8407193fac7cab0f9d6ae87cef1f8a23b397ffddea8968bcb8b759fa71ba1128 +size 1330959 diff --git a/eval-results/arc_easy/5/ckpt_093/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_093/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa459fcf255e1c11ddb6fa1d813151eab76469ee --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_093/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a7589379fd0f541cf361cee6ff7321b2e024e3a5a4ab6fbbde80d6e89e599a9 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_096/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_096/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e03b23db3925c860f0f63b7774e9f2dd40546dd --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_096/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:080f5cda05b9e2c7665675d6c9e656a6db6ba531554e1f46c4e0c2f8fe3dcf3f +size 1330829 diff --git a/eval-results/arc_easy/5/ckpt_096/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_096/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a379b867b33e0ad71bd71e6d152245edabfb0ef --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_096/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86f1ecb7d21f90371202811f18b1c6ce6b4d5fe801ba71a11c84bbe009400dc8 +size 2704 diff --git a/eval-results/arc_easy/5/ckpt_099/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_099/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f34cb07a0934ff94873c7db171493bc9e346677b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_099/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92436a8b6e0fbdb90ee48689418125adf61384bac33ee1ad951d7e45620ea6ce +size 1331021 diff --git a/eval-results/arc_easy/5/ckpt_099/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_099/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62a157ab5f339afdd939b46704bb03c2feb37000 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_099/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e872f7b055682be4151f07f6ac59f6028a605163a9ba103344ebc58eb376332b +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_102/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_102/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ddb1464cc1128fe40106976153047e365ea7d758 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_102/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a020bedbcf6bc44e2db40b8d83b2ca44a69784381ecb4f14fcb2927418a2a73 +size 1330917 diff --git a/eval-results/arc_easy/5/ckpt_102/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_102/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6648821996c3c2085cb04e2ed6b961ce71022270 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_102/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db6a054e62f36b0ddf0e85e484a8b424f4f63c10d0c1dcdc06a2d870a587703d +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_105/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_105/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..859066c0978c57a9e526a8fe43d5972b021f3b7d --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_105/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eef1aefd28c01f69553f873357b19304fa733636248714798f5e2ceb036d28b5 +size 1331121 diff --git a/eval-results/arc_easy/5/ckpt_105/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_105/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c264b6e0e0bdd43f2aa2e0b30cfcea4bede3cb9a --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_105/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d6e168010e0a016135569549cdf6fcdf168c45178d95bb2511dd469e1d85bd8 +size 2708 diff --git a/eval-results/arc_easy/5/ckpt_108/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_108/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e50b7907094f1c8bcea64c9cc03dbd10b8404ef --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_108/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfa9d8fb08a9ad1427887d3af68fbdaf96ab6e259f2b2de57dd8827a21a9810e +size 1330870 diff --git a/eval-results/arc_easy/5/ckpt_108/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_108/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a914a3a3b89ce56b6cebbb7a16c64cc70b23b32 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_108/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a498ce24191816ac7d1d2f14dfd5eeef54a0a6074ed284834757f2c5593900fd +size 2704 diff --git a/eval-results/arc_easy/5/ckpt_111/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_111/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82ce5c268d6c607af303f4642d274ab23f4b032b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_111/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba270fbf5ef58d1d6f8682ecb3effc2a2171ae059427f983dcdd37a97709463a +size 1330746 diff --git a/eval-results/arc_easy/5/ckpt_111/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_111/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74e330ab05d15edb410a3c87c5a87575700562c4 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_111/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b68c4ea4b5350103146aee5ff4200ff6c0b2548b0ee7fdc0f9e27eede7fdaeb +size 2704 diff --git a/eval-results/arc_easy/5/ckpt_114/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_114/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbed4fa5b95579398db4910c604dd3159f7dada5 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_114/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f52926db45092f9ce8cfa4c69d1215cc35cac594df67909a1204cd1a29638b94 +size 1331039 diff --git a/eval-results/arc_easy/5/ckpt_114/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_114/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44eebd4e3bc781968cf9dac48ee9ee82c81c8639 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_114/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adfb19894238fac3d8d214241d2be072e5af9545fca308a5f076fdcc2a9a314e +size 2703 diff --git a/eval-results/arc_easy/5/ckpt_117/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_117/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf956d63d28406af20b18962341a5ebc84d903e5 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_117/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315acd100fc2463d0b941c1acf4ccd68e2cf48e52b8fdd5e5a7331e8d40b7525 +size 1331061 diff --git a/eval-results/arc_easy/5/ckpt_117/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_117/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31706ccd4cec629207c7fc9627f849afaf1fe16e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_117/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:994b12a111db5e41f4d9fe5f7134530ff46c5bc7127e68905b9efd509ec75c08 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_120/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_120/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9820af29be3c68e16eb71d1fa300c66725cd28da --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_120/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80839900ff10635c9384af159a272c8f6a3c56133c32f13fd250e253a9808233 +size 1330885 diff --git a/eval-results/arc_easy/5/ckpt_120/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_120/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f3217a3b44febc2f80d2a8da129da9bf4639bb7 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_120/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e968530643b846ce804c561dd8558f346d3e55498d52cd4f37cf2f282692efd +size 2708 diff --git a/eval-results/arc_easy/5/ckpt_123/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_123/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d212ddc6f8be26a29a0a99ca1ba426380da77af --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_123/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1914389bd0c76a14893b6d4d84cf14dd904ca93c1563e1a8d0df71adc51637c +size 1330952 diff --git a/eval-results/arc_easy/5/ckpt_123/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_123/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a2215e0be210b96de8ab2663c70fa0a522eabb9 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_123/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e5707614b5fd0f863217354c91472c448f489050fce15904b12285d4de00b6 +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_126/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_126/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17b21fa1ce07d43cb9c730af2c36d578942dda6e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_126/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca224398ccba2d5ab7f6652f1ad29762f2f19d9d6e59e07c976ca1402fa7af88 +size 1331225 diff --git a/eval-results/arc_easy/5/ckpt_126/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_126/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cde3d3b6dfdacc6a39131ee85922044ee1a17acf --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_126/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf9e2e96844e8ef64026dde46236f02f975329ba241a1916f7f961f4b1269d8a +size 2701 diff --git a/eval-results/arc_easy/5/ckpt_129/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_129/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5e2cad32971de76b31249c4d97b8ebd89ba4af9 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_129/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c336382f5baed04665435ef95f3f318bd7a56f004bbade26df6561d527a7ce +size 1331042 diff --git a/eval-results/arc_easy/5/ckpt_129/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_129/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99b5c5d9b7ee65a464c01275c82fb40b083946a5 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_129/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9687ea79db5f71fae6b2e7af2940476e1536bbd5b716daee5e94a5902a01f5f +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_132/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_132/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d064e7987c856a593c222a22eaf612cf09600111 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_132/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9320566c26cdfe0c671d31f914cfb1df720575318750fafbf6c7091de45cfb59 +size 1331039 diff --git a/eval-results/arc_easy/5/ckpt_132/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_132/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e557a1dd6bfa8e8e6839aeab6db97c72bcd4bf9e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_132/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d11cedee61e33ad5efcb8ef381bb9e14c0a9e7c0983d8c32704a53e032d6ba3 +size 2709 diff --git a/eval-results/arc_easy/5/ckpt_135/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_135/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b8e616b74b945307a2a9b23023e7228249c7b4c --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_135/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:029618e73c3230dc9aa3a2b6323af0b8da2933db67988a4589245fd01d426957 +size 1331003 diff --git a/eval-results/arc_easy/5/ckpt_135/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_135/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d4897a159a30f4bf03239bb58356edd3703c435 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_135/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65622287bec200c2b39c2d576fb344b42e2d91a7b6435a4b67fa255088accd79 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_138/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_138/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cce1591c4d8554c30d3daf1d089c770f9163ab26 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_138/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36cef22406531daeebe9e67a6f3d3b66ad22fb011352cf8046e114c7fb86d8b5 +size 1330914 diff --git a/eval-results/arc_easy/5/ckpt_138/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_138/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e3a9abf494aa7eca4fac79d29234fb702ef7865 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_138/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb5146300d9c52803c5c45de8b6853e4740e9b034b371f5bd1470e13acb1236 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_141/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_141/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57b2ea5dd1ef689ec54e288034298d9c22517bde --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_141/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb632ccaa2ba18c8fa309a497be3928cef0379016d4c4bb2c130a2c107455a84 +size 1330939 diff --git a/eval-results/arc_easy/5/ckpt_141/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_141/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2bc08d0bfc54b4d9b873ad12e4d56eb7f973119 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_141/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f45bba1e343f3a0382123de44fb595d7012eac4ea30eab750d4c51d62d3e364 +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_144/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_144/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0973d7047c7afdfb7308f1ba3ac555f727aa482 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_144/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8555947fcd4c68abd104edd7e274e2fb1eb041f9c7b60f30ee0730de9641d8a6 +size 1330951 diff --git a/eval-results/arc_easy/5/ckpt_144/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_144/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84c2918122b1665bb54f7e1e9e3c25e947b2947f --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_144/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:642a6319cf9f3fe0b6029012225cd2d02331a9e5139f32dae8e25655150b5f60 +size 2703 diff --git a/eval-results/arc_easy/5/ckpt_147/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_147/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68eaa54d6e311706a994f2d85d2a583bcecf4387 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_147/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc03696c0eb67ab59b46c1a32a6bd12f4f07655838b5392fa4cc9b35a97e2673 +size 1330915 diff --git a/eval-results/arc_easy/5/ckpt_147/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_147/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..258c3913a24b0e86b311f3e1baaaa24c829716e7 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_147/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be4ae62a50f73056aa39de7de194acbeb21ebaf0836636a1e9d3740f34bfed8 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_150/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_150/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7042c87042874a47e416214e2cf186de763d1c5f --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_150/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3937746eeeade4eeda5e395ff2ec9a5427ee433b8d08d259ac193d5ea87d6d00 +size 1331218 diff --git a/eval-results/arc_easy/5/ckpt_150/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_150/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbc847e38df7217ffc43134339fb9c53f8e1be6f --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_150/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e72a678ee171f85eec0daed6780ec29375074e306ac08c2cab5d15aff38f0d4 +size 2702 diff --git a/eval-results/arc_easy/5/ckpt_153/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_153/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66b8e763f38a7f2e26ebd41c256116809a815c3e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_153/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d62b93f5fdb97563f081626868bacd78bf8f6047ba3ad4f305ccce47ec12fe +size 1331123 diff --git a/eval-results/arc_easy/5/ckpt_153/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_153/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..efa34febf627758a230b86df2f0be97b5f7fc2a9 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_153/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bdee8c898dadd0635c268ad585db1d80abf0b43ca2632ac869a5834aa7ad48f +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_156/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_156/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cae86c5e505fb1b6e913df522be95043859f2f4a --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_156/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cd843fd5eebdd67f9f0706451fe60c1f37155506d9e6ea696d5403a32c34053 +size 1330904 diff --git a/eval-results/arc_easy/5/ckpt_156/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_156/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..048a28e6cdb3873616af91d6e3d2133270f50c97 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_156/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63dcdb4bb89aa560034dfadcaa82ba5d50c209f983d151de11f7275ea2fb40b +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_159/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_159/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1025d546b9287d8af08d8f7ed56f403f5a468740 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_159/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f5ca5ac89d14a0c31a8686aa3a6a2f8bebf3faf86951c9410c315db6c2befc1 +size 1331216 diff --git a/eval-results/arc_easy/5/ckpt_159/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_159/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2ed57fefbdd3802d4667d4fa042dc9d021fc1d4 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_159/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0684035e368ae99bac4f3db9f7e5922e96a00f3010c74449fdc3986772818f0 +size 2704 diff --git a/eval-results/arc_easy/5/ckpt_162/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_162/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d83d00fc688d466a7c45815ba507978f1e885817 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_162/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64250d333480aeeeaf5fd0867134f3939ca13b15abac4e10ada65cdb509d18d5 +size 1330984 diff --git a/eval-results/arc_easy/5/ckpt_162/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_162/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fb3018c2cbbb154fe6d8d9fe6526f6c336e4b03 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_162/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fe1e80d4ba7a7dfe0442ad7b69a3fe325c37bd8012c1081377021de19dec247 +size 2711 diff --git a/eval-results/arc_easy/5/ckpt_165/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_165/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..549ad3cfecd85f163d521d5863089b51ce1da0b5 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_165/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa6b94f0f18cb1c231ef3a9a260f4f2b6302953dd7eb3022addfb186f72c971 +size 1331055 diff --git a/eval-results/arc_easy/5/ckpt_165/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_165/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5c4fb83c8e58cf6b2b3db72a3b20b3e8d2522c4 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_165/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52a455129363bdd844b5fe0a688e8a0a738c7cd8e90aca74f6d5a075ef270913 +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_168/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_168/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d2a62e8bd369a22d071713c9b663b19b4777199 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_168/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:325c55aae1571ae0e91925618c38c084733a86fad6292410d2f12387630d1051 +size 1330952 diff --git a/eval-results/arc_easy/5/ckpt_168/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_168/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63dec029b9558e00e2263258687afe0985b3697d --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_168/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b388127c60088723e88e9b3762572ab174b3c7e1f4b2364672df39748f5ffd +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_171/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_171/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc15515ed07bb37fd3ccef5fbe9fe1854580cac9 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_171/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fcc810847739d67491e2f41f818d42f4cc0f5a6b8883c6bb83b08189c688bd6 +size 1330921 diff --git a/eval-results/arc_easy/5/ckpt_171/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_171/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c9223cd4aeedfd97399a8847823bf6cff7a6a23 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_171/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d602a58cd0d4cba52e0a3e808abb8f32749f4d6ec8becb309f0fe327012f202 +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_174/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_174/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7fbae11e9cae1452fddf316a121c5c130346efa --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_174/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:065accb812ecc4f1f28b86c29eb1773e5a6c38c46232efd68bfd1d98845ab79b +size 1330969 diff --git a/eval-results/arc_easy/5/ckpt_174/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_174/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd8d3c703879932150aac55ee403443eb4a383cf --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_174/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d050299884ef40032791a94d9fef7381bef6a0fb17771e181f29a8469341b319 +size 2693 diff --git a/eval-results/arc_easy/5/ckpt_177/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_177/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..733a7ceb51834f4fdca7e0cde31d345caf48afeb --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_177/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:393475db5a95b6085584b604922198eb7d21324d950e915d7a6f7c64ac39a9c8 +size 1331026 diff --git a/eval-results/arc_easy/5/ckpt_177/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_177/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79c9db183912bb205987d218b51688a63dfcfdf0 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_177/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faae9e69a0c443b974facdad995ef2dc3f03bc06ab2a203e7a88bb6c855d36d4 +size 2737 diff --git a/eval-results/arc_easy/5/ckpt_180/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_180/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fe4ea1fafe3e0efaf7421f4c861b81e315436fc --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_180/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3f95893ff2b0082847f64845b1cd7a79a8c88242372b6d024e2c2c54b90ddae +size 1331054 diff --git a/eval-results/arc_easy/5/ckpt_180/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_180/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77a4823ddb82bac31cb138db6d1fbead9d95f6c9 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_180/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76c4a54c856e7cf303a087c7177a362b787007204b5ce517235242466c3a8e70 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_183/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_183/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53261e7ef6975ef68c4adeea3d8c72c861717684 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_183/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fc26e4e74b3cf164c0dcfa5910399d64ba6a3e6e3eeb4e352471660f77a3c9e +size 1331111 diff --git a/eval-results/arc_easy/5/ckpt_183/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_183/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec46e1a107cbe9d6b9756976e817bf995f1ca7b3 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_183/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8badd17db7f6525c900c6021c079206b95ae61b5e28dbf30cffad9aeb8c7b82f +size 2699 diff --git a/eval-results/arc_easy/5/ckpt_186/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_186/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bf859b3f8c134ead80b5ec69992401aae0092ea --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_186/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16aaef327d9ce4a87e4ce98e8441184d512b2efa7b2dcdcc2b89f832bbcadab0 +size 1330949 diff --git a/eval-results/arc_easy/5/ckpt_186/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_186/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba543d7ba21664a15264b3611720ea925baf96fb --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_186/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a911d4c16935914cd591582d79b050b33296a5f497b77eb5e7b941a7dd6d7733 +size 2708 diff --git a/eval-results/arc_easy/5/ckpt_189/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_189/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..973a0d5680874131838ba891b4618b5cc6df382e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_189/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a86b7c08cc32eafe781bff868bf591454cb3ea27e40ef8c222f68c62035950 +size 1331090 diff --git a/eval-results/arc_easy/5/ckpt_189/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_189/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d89e23592cbd433de938c897d68889289ccb6ece --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_189/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc2b7c32b3e117176f61da4a57f593232423fa01303db9379ae9fb79dec8c1d8 +size 2702 diff --git a/eval-results/arc_easy/5/ckpt_192/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_192/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2f9027868b38e7a19cb0ba5be104661e8f5bd1d --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_192/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36d4ca5adf377df5d31382366215a1c224b1a7d2b273b7eb668b8ba9c4325250 +size 1330929 diff --git a/eval-results/arc_easy/5/ckpt_192/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_192/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cea28cba88a1e85fbcc77d185969ea8b9facbf04 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_192/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d127727aa0acdb5cef6247ca4794d3127a17c0eb28e1aa18ac81b0703bc280e +size 2709 diff --git a/eval-results/arc_easy/5/ckpt_195/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_195/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a458073e2a559d88dd52d5129c0ef9e51e67bca --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_195/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93628198c70bf1eea7812bb10e00cdb30d87983faa17342eb09609244b3eedfb +size 1331337 diff --git a/eval-results/arc_easy/5/ckpt_195/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_195/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9efb86525a0ebfe1baf70aca7b16f2e855453f1 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_195/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80e0b51ec7c04cc9a416d8a83c23763b9de90361e35c1c1efd8f1cc5cbdefa0e +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_198/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_198/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2cb22ce340247a1917364fe0960ab8a4f3cf4ff --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_198/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07eb2d9371b3302ef904f6cbb2cb220a190fa1d129b3961034bcad7faf4b6c59 +size 1331130 diff --git a/eval-results/arc_easy/5/ckpt_198/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_198/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74fc1720dca145c1256b646a775aa46531af0277 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_198/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc0152f962ecf5d24a615a129ba75b8282377d54eb8a2d2c5767cb85c408a02 +size 2735 diff --git a/eval-results/arc_easy/5/ckpt_201/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_201/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbe39aed491ea3897ff2879cd03ed87cc39b1ac8 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_201/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce11e139d6b3470953ef362c4c5757c4423fd456c237ffb8832372701ad361ac +size 1330875 diff --git a/eval-results/arc_easy/5/ckpt_201/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_201/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d203d598cb703410428dbffbfec31b1347c8f10 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_201/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:875a7f3599d902d07b46203eecf6dd8d5370836bda81ab50f5f442ab5db944c6 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_204/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_204/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd864ab50aebc44732f504410b2f39b9354ce6a2 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_204/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93b6351267503b08ff8c9c35293e3c1db5c0332fb692160b5f4cb81cacd6dc51 +size 1330941 diff --git a/eval-results/arc_easy/5/ckpt_204/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_204/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eba4193e15c7fd02966839e603cdf4a31bc0f8b4 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_204/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5279f554e3e019cd788398fef46ec312e014a3ef38c847669dc4fe47c767d42 +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_207/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_207/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ff6b3ca4addb60eb74252264b83554c189fbe81 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_207/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04e92bea28f1c0c2b18e2062d9b1b7560d3be374423e667efee79d741688046 +size 1331043 diff --git a/eval-results/arc_easy/5/ckpt_207/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_207/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2c0d93b88c115e0e2c8b9e131d3fc042249b958 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_207/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffd0904c3606595e7c51ce004da476997d1f91c90f5d3452d0c56b5a328e4c7f +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_210/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_210/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6270c7caec7610ef2cde2cbe4fd41dffe685dc95 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_210/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52ef7749b341e143e828d962922c41eefe2aa23ec4a0dd31e760d178f788867e +size 1330877 diff --git a/eval-results/arc_easy/5/ckpt_210/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_210/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdfe22fa0f7e4dba4ddbf5c542111ef268748036 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_210/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04a549bbdbe36266ed302777af780cf32fa89262f760221c9cf92606d490414 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_213/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_213/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cae3935dbf45a4bcec684539c3a31f47d2c847ae --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_213/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09e97d562dde6fedc572350a09d94666084df180a4a55261a337e4d151abab8 +size 1331235 diff --git a/eval-results/arc_easy/5/ckpt_213/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_213/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc4691845d7cb0f1b8127e5df5c0ad15fe46bbdf --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_213/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1fd48cd886c13b9be76eafe5ae9b3d82e27d063c17b0cae2d09650dd0e8acc6 +size 2702 diff --git a/eval-results/arc_easy/5/ckpt_216/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_216/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93518437ea214bde5874045c7b0e8fcb5c4e95fd --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_216/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68df248dba2587a0610fa0da69d4c765dd27057bbea8fdfc9c684d19df21a21d +size 1331062 diff --git a/eval-results/arc_easy/5/ckpt_216/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_216/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ab313b449848faf5dd5f30c806c89473f88d434 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_216/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c1c06770bc63484d614623f393bdcdcbc80b128635e8e9fb35e1da4b41fc534 +size 2737 diff --git a/eval-results/arc_easy/5/ckpt_219/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_219/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a8a701408d54e817be8494aabe34fa74f92bb5e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_219/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0850e291bedaf4032bee4311ab2147499e411bcbd357368b025c452aef8157c +size 1330962 diff --git a/eval-results/arc_easy/5/ckpt_219/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_219/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6b54fc843a9934b32502397e57f3f24d6a2dfb5 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_219/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bdd54238e3249b196e87b431c0d591042a8ec15bd64a697c1f4f7d98b191607 +size 2702 diff --git a/eval-results/arc_easy/5/ckpt_222/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_222/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7250607569705034dd0bd369aa9c9f92d1fd799f --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_222/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae5802112ab35620ce1d63d2f6e364b534f9e872eb454ad0fd0c7e807d2eb1b0 +size 1330928 diff --git a/eval-results/arc_easy/5/ckpt_222/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_222/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e162833cb80e7eac3e48418d4997d90379d13f9 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_222/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7c442f29d99b53dad396ddc2b3bad981176cffe7de2fde6f96b330eadb999f +size 2701 diff --git a/eval-results/arc_easy/5/ckpt_225/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_225/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2cd9ef37ea0db71dbee0876ab9212b52bd697b1c --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_225/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:797aeba1a2b0d1e4a2a2e5645f0c0e38460e663205d32aca41fca0f0d07cfe3d +size 1331045 diff --git a/eval-results/arc_easy/5/ckpt_225/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_225/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c7a1337b22bdf85159c9329255a6c393cb17015 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_225/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7388452c94af957061f9c7ee49d18cd6ef873fdbc77c9ecc14c7ddb852ed55c2 +size 2700 diff --git a/eval-results/arc_easy/5/ckpt_228/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_228/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33878c0e55c19c7cfc59a25138fe65b8221ef981 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_228/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:355532589238dadb1067ec4e7bbf13f4db9ff816bca81efaa8640cc2883479ae +size 1331074 diff --git a/eval-results/arc_easy/5/ckpt_228/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_228/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13818ef937abb78abd72d749f91f67d11bedc348 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_228/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d06c9b4a44e54016ce679bb2cc3b80a00fecafc207342a6feedd583a43414c3 +size 2711 diff --git a/eval-results/arc_easy/5/ckpt_231/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_231/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6b54c9d0bf2f178d13b47031f646d05316e8b3a --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_231/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd7305b54085cf02fcaf711fdd462eb74c6b1396d6fee0f66ec69965b075bc6e +size 1330987 diff --git a/eval-results/arc_easy/5/ckpt_231/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_231/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38d1f9a93e47927dee9be11edceba88d6238f0d5 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_231/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c465cff22f8b9a4efbf39af98cd8b6e54bd459255bf366843001263b96f5c3 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_234/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_234/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c24e1b735c19c4bc042879d3ec811b824861557 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_234/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ced4d77b49b3934e7c9e8ee40aed4f5a7513fcc546f6690ced33ba568bbadbf6 +size 1330961 diff --git a/eval-results/arc_easy/5/ckpt_234/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_234/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f346fddd908de540f2aa5e37aa2bf5c447a968b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_234/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c8d12dd563739c4882c2ce336f3bdaf520e1772b9da5f2a55818b18c023280 +size 2704 diff --git a/eval-results/arc_easy/5/ckpt_237/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_237/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32bb1a51f5d47625f2d14be987bed1154e957777 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_237/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bd0a6af24779e453865cb7bc340ed8b5f54f46bffd31d9307beb6c80023d61e +size 1331009 diff --git a/eval-results/arc_easy/5/ckpt_237/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_237/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4fdf940884681ddd03b88ae34413ede9ae8093a --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_237/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bbbb6ebc9c32d891b5971d11dfb376e8a53b3915eb7719632bc1359fdc818e0 +size 2699 diff --git a/eval-results/arc_easy/5/ckpt_240/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_240/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30553748cc1d3483b9373dbe43b913b277ae5133 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_240/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d34da3287c0c430baac11ea385002de1f4bbe495b754161e843990995e147acc +size 1331064 diff --git a/eval-results/arc_easy/5/ckpt_240/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_240/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da77e6521ad1ac07b9151870ada73a5cecd6e91c --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_240/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b231cca5c8a6a9b888d3ff87ade7e1882a2c6d6c3ca0ffa77490abdf558185b4 +size 2710 diff --git a/eval-results/arc_easy/5/ckpt_243/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_243/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..926ea0411e3451215989ade7ac137e29083a4ebc --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_243/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61ca6dbff32b51478a3a489c099bd8841a6d9c01990d33d23c76a71955d50db8 +size 1331154 diff --git a/eval-results/arc_easy/5/ckpt_243/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_243/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d9d9da815313e24bbc2d0d63cf31b9c3ec0f1eb --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_243/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264a8ec9f721beeedafa12168b2d897107524dc86272f43f5170b661720e48e5 +size 2732 diff --git a/eval-results/arc_easy/5/ckpt_246/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_246/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50ba6b8cc440405707b61e49f9d90d5c2433721c --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_246/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e4b5d9326028619c289de8d6539c15bac7d705edaf803b5bddbe8d8bf7cc66c +size 1331111 diff --git a/eval-results/arc_easy/5/ckpt_246/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_246/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c84a17aaebf52c0452b98feb0a104df9270c65b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_246/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:639c00464fac5a6d52e1d4f72de4c18d50589f1953d5f9d77d6bac0d40d9b850 +size 2701 diff --git a/eval-results/arc_easy/5/ckpt_249/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_249/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e040ab61213a1cc8c067c393bda4cf64ced3158f --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_249/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87496de6603508eef697d0591490bc9509f7d5db191e832d956663ec446d18f9 +size 1330958 diff --git a/eval-results/arc_easy/5/ckpt_249/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_249/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce6edd2ee1d5eeb2129e9b3cc727a0edec2d8643 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_249/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5530c58de1167e666bb00ce8a1582fc72f051861286156e040d14193b7a73d94 +size 2711 diff --git a/eval-results/arc_easy/5/ckpt_252/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_252/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b7085cd3efbff1c2a622a49c0450e3d15ba38ba --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_252/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53daf1cd6fe80822beab355b546b34a8195097bd41eba4b59d7f0c671e97d83f +size 1330921 diff --git a/eval-results/arc_easy/5/ckpt_252/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_252/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ac4494b9ab67ff37c559bab40b80da3172d566b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_252/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c8cd4b760c29a53dd210579091bcdead6c995853f32e15f43e86c540ded5b35 +size 2704 diff --git a/eval-results/arc_easy/5/ckpt_255/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_255/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa21cd7bfa688780691a62bcd6c5b2dcd093bb2e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_255/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2403df430534e7de09b979c0cedfb57cd209c9c8dbe7470ac078abee685a2de +size 1331030 diff --git a/eval-results/arc_easy/5/ckpt_255/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_255/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae13ff4c4e42e1ef329d2b58daf9c4c616def984 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_255/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d3f9420602477d0a22893a8109e63f349933770c59f49dec2f570cf1cb29996 +size 2698 diff --git a/eval-results/arc_easy/5/ckpt_258/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_258/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdc88714b93191767a32d2604bccc1c851cb2ed5 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_258/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29d800230d691bbc103e9ad21ba5421722374331e34e9fc11e76acfcc1330dc3 +size 1330798 diff --git a/eval-results/arc_easy/5/ckpt_258/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_258/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea3a591c7e496171fb11b457a3d3e1a670ca5d38 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_258/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4774310d6e9be285f89a00e1f4c974bfcc1a4f918347b2310c48fe3b59e95ddd +size 2709 diff --git a/eval-results/arc_easy/5/ckpt_261/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_261/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cf50b985d5e998367ca98f98816143d1941977a --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_261/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1f49ceb40b50296260e7b13b9eaf3fdb773dbd9d97bf68ccb28ce298440ae8 +size 1330978 diff --git a/eval-results/arc_easy/5/ckpt_261/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_261/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b40b53e8582ec2d51ac80a9982acf78b4b15515 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_261/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:103a8e02c2af6e54f908d4eb92f72932571b4f31381ae5daab17386901fb87b1 +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_264/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_264/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aaf4aac47a08e61bc34a3b816bfb4b40adbd588e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_264/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4af5e4cbea7596c5c01a2c719947cc09d079bca668093ba33b44dea9bde56a1 +size 1331031 diff --git a/eval-results/arc_easy/5/ckpt_264/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_264/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e03d63e685bf6d45b44fe10bed670c3031632d70 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_264/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a26bc2a363ae9933445c8fd8fdb5208cbab9a753a50129d2497046b4856024 +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_267/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_267/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60ffed8fd1ae392dff49893843a0d78e7fc33953 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_267/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7523be4817738a00e16e049031d3a3696dffd12622a7fe5681d7c7969212b2aa +size 1330892 diff --git a/eval-results/arc_easy/5/ckpt_267/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_267/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9416b242a9734ab1fed584bb649f15806ef46cd8 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_267/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67519b03c6e0ce4fa7158b61c767d243b5f1a5c14dbaef2ac4e164beda8a8531 +size 2735 diff --git a/eval-results/arc_easy/5/ckpt_270/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_270/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be5f6b87bdb180d5a9c65af66686ff1df4f64c57 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_270/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb6fcd448ec88a09cadf8f7c7119778599d963aa8f2374768ccf21cd1eb30f78 +size 1331002 diff --git a/eval-results/arc_easy/5/ckpt_270/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_270/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c39e582efc41d169f8590d1244a2ab5d96192a1 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_270/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4cd8cbfac52181faefa943f33696e96f2e4d22f1be1094ff11c6a427c542a98 +size 2703 diff --git a/eval-results/arc_easy/5/ckpt_273/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_273/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a89f050cc4e772d73bcff1b26626137f122f1e0e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_273/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d3bb3f4e8a30b27499c7c52056f8d566a59d7287d3759ba2a920691cbe4e9fd +size 1331004 diff --git a/eval-results/arc_easy/5/ckpt_273/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_273/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..583c41b8322eab7fdd6780b913f8c4771c2cda94 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_273/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c990ead6f8f5ef596f1bee4558a3ffc6c28e4d23bdf080004b77200e971e0a1 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_276/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_276/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a40358936b8e0842a37955a92b7d41358b15c5d --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_276/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13f624f42ad3979e7d5d10c5b77e932032610529b7aa4475eca70e46f5a3c037 +size 1330986 diff --git a/eval-results/arc_easy/5/ckpt_276/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_276/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3aad21ededad55bd120b108575ddd9703b8c028c --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_276/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f36fbe7b0669a510609f37bed8c57c66ca0a329e0d2e1141a84ec10e9527725 +size 2734 diff --git a/eval-results/arc_easy/5/ckpt_279/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_279/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3692fbcf0ebbe30dd8a8aec3b4fad5ecf0e732a --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_279/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df543fccd7d6bb5dc57aa23e6dc85d461334f91d160bb65da200764afed382a +size 1331154 diff --git a/eval-results/arc_easy/5/ckpt_279/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_279/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d12a576b5d18db34a1697f1aed8606bf2f1413be --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_279/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128df3dd2a87e4df268212771f8012559fe7340a2c6386ef01705a64d7bbd4eb +size 2703 diff --git a/eval-results/arc_easy/5/ckpt_282/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_282/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd5eadbac3e86cb98b29cab1a794e2134c1f629d --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_282/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458a9ddcf7e1877e172e7f78e60bc4114cbc22917f8846c1ed8cad03e5ab1190 +size 1330917 diff --git a/eval-results/arc_easy/5/ckpt_282/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_282/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec9528220a561047d6017d9ff960315f28f58cc4 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_282/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e06090304b7b9d6fc170f74451090db1eaac006b8c34d3b2d5b59dae3bb2ba +size 2709 diff --git a/eval-results/arc_easy/5/ckpt_285/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_285/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53b9655f6533f250d28491126073f87c01528d67 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_285/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6bb95a1b9be6ace442e20284f4e3e9e0144a3a68e8913397da6ba4493c89dfe +size 1331016 diff --git a/eval-results/arc_easy/5/ckpt_285/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_285/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0a02093b258d8eb527d522aaf7a2f3886b96929 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_285/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17fa749172cac879dc4113b47d04de959258f896c43effdef5256db6583d76e2 +size 2703 diff --git a/eval-results/arc_easy/5/ckpt_288/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_288/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..075b10d7bf8fb622ac21d74fa894462ed83165dc --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_288/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba9bb7ebf24f3dc215bc4ba7d6f825af4e7736c246fb9e2b4f604c6cf123e76 +size 1330979 diff --git a/eval-results/arc_easy/5/ckpt_288/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_288/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a0d7c8278908cd8905a4d9241758662af62abfc --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_288/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4c0a5295a363a404faf5ee6776c71d93eb13ef29cc1d30ff11063f465ee896d +size 2714 diff --git a/eval-results/arc_easy/5/ckpt_291/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_291/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1fc3e5833acf428efbce02fffe9f2693a3196260 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_291/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e95b9cb7369bca045d9fac89adc8d8dc840fdb941395d9ef26d0af7b724650 +size 1331027 diff --git a/eval-results/arc_easy/5/ckpt_291/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_291/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af8c06cf6ae26233411fa9eeca4403d595f46f3b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_291/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:947b56badcb3bc45224524ed8a79a977af4d91038ed016b8ce0d9cb7a8e3e83a +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_294/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_294/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..935967855511728ffd9e58e710d2d02813524c90 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_294/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b21a6a63bceae4ca25340a3b83e6326920c42544f8e6adac47b3b9d68772fe65 +size 1331057 diff --git a/eval-results/arc_easy/5/ckpt_294/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_294/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11b5aaec734aa1d06d0d4c7e928d49b6d8b17966 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_294/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f23f257e50a64d2ead5d17d9fba76c31b3e78e786e9eac1689a59a53667b0870 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_297/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_297/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88855f43d9929c828dc553e776bc291efbc25698 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_297/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e544307e415a5af19ad493cae202c3b73fb62c2bc478f2a19bb6278c946d4ca1 +size 1331209 diff --git a/eval-results/arc_easy/5/ckpt_297/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_297/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d70b13f4d345dd5d057f04d4c6dade4d867ccfd5 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_297/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3491fd966774e20616dc909cdefc743d8bcb0cbec267c6aef7aed0bc18eb3ad0 +size 2700 diff --git a/eval-results/arc_easy/5/ckpt_300/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_300/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df5eaf24f080216eae937c0f151c4d402697a7ef --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_300/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa1eab57412c0dc21e40fae1fff5bf616f5cdc4aa9ce3b93277c1f0511636a1c +size 1331027 diff --git a/eval-results/arc_easy/5/ckpt_300/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_300/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6a00951a355c2db6712177e8d25101e5db490b6 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_300/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f12be25b53aefd14b93b581b9001449f888b7a9b1229a9da3712755a3d00044 +size 2702 diff --git a/eval-results/arc_easy/5/ckpt_303/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_303/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46b75365233dc47f6f50b7ceeda64a9c61f8039a --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_303/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6106901cfdb3b1b510865aac94bb4c6dfd72a817b317eca65efb1da6a7d7ed +size 1331091 diff --git a/eval-results/arc_easy/5/ckpt_303/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_303/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe21fed9798608fce4c2a939b6c69d9472a3706d --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_303/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a8ff247c119c53d18a9771d144fa9b340055415f1018abd2ecb6064cfa7b38 +size 2701 diff --git a/eval-results/arc_easy/5/ckpt_306/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_306/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9131a215a27010385584aafde9c54ccba59adf2 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_306/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0631d6004974697886868195e87dde77b676ff62d097531b7e0fcd4bdf594643 +size 1331127 diff --git a/eval-results/arc_easy/5/ckpt_306/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_306/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7b214450bb3d40adaaae94cf05269231a0420b8 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_306/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8558ca2a9adc69fa064bc29a8c4e1b5a88d035ffc7f51d9aae970ccb1bdc73d +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_309/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_309/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21dc577df3a2ebd9eb6ce6a01ed11706fec45fa7 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_309/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2784772c80c107c03ad001570d7266536876e2daa25006350a4e3cb3cd0a85d8 +size 1331200 diff --git a/eval-results/arc_easy/5/ckpt_309/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_309/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d87caa65e733f000ea9d1707231626f89833295f --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_309/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c464a2d5403e58e5683dcdc91a6fc62d62c40f55fc45f0a2a37ca09d4da922f +size 2709 diff --git a/eval-results/arc_easy/5/ckpt_312/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_312/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4af52c806609e4090cfae6ccf62ce232b1e795f5 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_312/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bfedb4d9d3507a4b54a6b6c2b1b40e4126f58eb813b9186a4919f30284b0571 +size 1331229 diff --git a/eval-results/arc_easy/5/ckpt_312/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_312/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..471d35aea45c6e118637bf7bd75ba5c0f2bf26f5 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_312/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fbb85a26d06428886c53a6428bbfcb0c0a6690bfc019339ce8e6771472b9162 +size 2703 diff --git a/eval-results/arc_easy/5/ckpt_315/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_315/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5a86b4da37e1559a9c070913728066f93fcdc5a --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_315/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38a3fa23926d1b1fb33f89f5e09cd1e8f803857915baa4e3f788d84fd150141 +size 1331042 diff --git a/eval-results/arc_easy/5/ckpt_315/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_315/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f990c5f40c61dd61c56ebbe822c5d7062f5c75da --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_315/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19083ba2325c6b4e5a588edd1e173f12ac317d1d97fedbf62cbf5546ede719db +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_318/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_318/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..792d481527cee76b7c824df928d5986201548c5c --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_318/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:327b395a15d7417e2b6c1264234c6075bd222a3c3e586de172e566436676f374 +size 1331095 diff --git a/eval-results/arc_easy/5/ckpt_318/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_318/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03f02916c775645fb2d0372bcf228392231b08eb --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_318/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6848711b2e1b6173caa0ad8a885754d0967979abcb46d3594865d9bb23f29c +size 2709 diff --git a/eval-results/arc_easy/5/ckpt_321/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_321/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e20536ed0e6e1e7e819056e15789b0bc55c38d91 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_321/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81b3ed3ecd4f0a7f9cf466c6b9cd6e855d9b646df0d6cf12964245ea6020910c +size 1331137 diff --git a/eval-results/arc_easy/5/ckpt_321/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_321/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d088736fce6a44e103f7eaa96c6c9336dd2ad88 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_321/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b51eec4dc942df1e6d195b8b2f54e197b85889542917cf6fd20d7823423e366c +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_324/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_324/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40f3bcbe6f83ff31ea2d10a2d0e75425bedd6f7f --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_324/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f04feff30fc457fedb1534bab152cef17a0cf01d18e9ba73b7ff6fcf441fb6b +size 1331044 diff --git a/eval-results/arc_easy/5/ckpt_324/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_324/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9faee2915816ffc507fa479dc690cee6c2b77b8b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_324/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4e17bba970dbf1b6bb91cdda21e6664c4eae94b8ad9fd6094d8c8fed7b20ea9 +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_327/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_327/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9c88669f2657718343a4b2e824197b81ccf2e63 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_327/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60de3f676029eb368f1a4b85bd034d19a5f02e28c564f27cb42ee417f8c4ab41 +size 1331080 diff --git a/eval-results/arc_easy/5/ckpt_327/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_327/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e550f176a163fdeae74b547d8bac3182a21fb5c6 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_327/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a08342972eb208a1be5a12eaf8d1c8d08e3cc209cb5307b7267dd366c45ee9a7 +size 2704 diff --git a/eval-results/arc_easy/5/ckpt_330/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_330/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee9326f9cd5d1c39725d1000bdf72ddfc64de50f --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_330/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe41c4e0817c85e974bb1ea92b904272bc80c56550364797d422667a4fdfc2a +size 1331109 diff --git a/eval-results/arc_easy/5/ckpt_330/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_330/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c5d716a33814a7ec7e80d5fc47e4c60c04d08c2 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_330/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b019923e9484654c7f4ed23f7bac4844b2fc7cb389c2efa023cde29a4138e3bd +size 2709 diff --git a/eval-results/arc_easy/5/ckpt_333/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_333/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2991abb8f93d6c727ae2ee7ed79a45d4d928ec68 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_333/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dce9a64706caa1ee8bbbc9becf51a6b5a08d6a99a1ddfec9867adb59bb719bdc +size 1331131 diff --git a/eval-results/arc_easy/5/ckpt_333/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_333/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..489290009dbb8abcebf6d7d8b3cd2f2418cc7c73 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_333/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6a3727e63b90a9d789419d13c0e69ab7b28620647dec4a5204349966c1e92bb +size 2698 diff --git a/eval-results/arc_easy/5/ckpt_336/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_336/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d15ab103b699d28f299393c133cd21d7e28faf6 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_336/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec53f09f50ae4b5c3d0ea38cbfa6ab612d33e77c0bc513390cb0dae675469d14 +size 1331051 diff --git a/eval-results/arc_easy/5/ckpt_336/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_336/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db266edc6b2d332cd0c13ee7a382774f0b0dd18d --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_336/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec101142415cd9b29821b774a50fdf15f1a895139280c4fd41aa1d5f56d82ee +size 2705 diff --git a/eval-results/arc_easy/5/ckpt_339/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_339/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f11722b7e3b19847f95406bd2457e987aaca66d --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_339/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275f4c5e2d2adbc69539407a8aa617d3bea397513010f399ed1f0c699cf6912b +size 1330960 diff --git a/eval-results/arc_easy/5/ckpt_339/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_339/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c4a47dbb46c0df3081a5a3f04e3af9f5f3e5959 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_339/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e658d56d815b6bfa6be05cd2541497e36dccf1d79417d6eb82fc32e0928af34 +size 2707 diff --git a/eval-results/arc_easy/5/ckpt_342/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_342/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ac0078fc025f0eab73997043463ad06f29e615a --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_342/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8359d785ac0ee3f87a42884690688a8991a6dc4116f9c5dda6c18faa96f117a +size 1330934 diff --git a/eval-results/arc_easy/5/ckpt_342/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_342/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2315d493507b35a05fca48a4362cd990fc2eb92c --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_342/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c507a3dd1ecdbcaa955269fe5456a597aaac942ef6e8b62dc04136678d8ba7be +size 2701 diff --git a/eval-results/arc_easy/5/ckpt_345/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_345/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d58c9e68a6a4751919eb7decbab800f00e352c7e --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_345/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a81e1099618ae61ff2a7185c3a0ba1fe05f9a9fc6fd9d52e7f947887e738103 +size 1330987 diff --git a/eval-results/arc_easy/5/ckpt_345/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_345/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a131e0e5d215d63ca6eb7e6ae45c1a8f586a392b --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_345/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8090e9525df8c2602f302ed3cad4e6141669b75189d79d49bf40b504f428340f +size 2710 diff --git a/eval-results/arc_easy/5/ckpt_348/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_348/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..965bf2bf62ec41667ba2ae62c2fbe2136c38f9c1 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_348/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5ff1897561ce72caad06ed85b39550c9ee410df77fdc85c156b48094c99e1a7 +size 1331170 diff --git a/eval-results/arc_easy/5/ckpt_348/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_348/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e7753317c650a1b4b08033067b55f54a56621f4 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_348/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64557ef22cd26778c6d1f2ba430b15796e42265a1880168de3ee8117dafdeed1 +size 2704 diff --git a/eval-results/arc_easy/5/ckpt_351/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_351/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdf8992348ec5136262d9a03255e280fce8e6469 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_351/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b88216e6527f8b411bd63932fc826e73d6d629717cd48b48c5d1d2a2e422197 +size 1331110 diff --git a/eval-results/arc_easy/5/ckpt_351/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_351/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6df59f467206688f2cad09c48fcbc8c2f143443 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_351/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f78181a95788ccc47a21b755e30905dcc8ddf72be29cf9acd352672f5c340d4 +size 2704 diff --git a/eval-results/arc_easy/5/ckpt_354/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_354/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04ae2edb60cb58c79e4806299beadac191b36abe --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_354/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0246047137cfda95faee040d2bac6b88dbcb8e7442310b2b481a465a50fa3883 +size 1330964 diff --git a/eval-results/arc_easy/5/ckpt_354/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_354/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45ff20bfa6d27e4acc487b52337fd975a4d9f86c --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_354/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e707093afdebbbe2a07d2ef3c7e681a679c9f1b0e2a02e85eeeb35995145323 +size 2711 diff --git a/eval-results/arc_easy/5/ckpt_357/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_357/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4d65509fd3d812bbc6b6b6d131fa27cb1bd52b6 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_357/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b1078b5abaafb42241e6bdff0458ca93f6efee1a5b4744b446bb7aa927f1426 +size 1330980 diff --git a/eval-results/arc_easy/5/ckpt_357/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_357/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f86dc5c7d01f4aa5f0305aa4244132dfb2ecb28 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_357/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0693bc14fcc0e8b768b6bc12b415c9eb8d4987f5c103f576f59b15261082a0 +size 2706 diff --git a/eval-results/arc_easy/5/ckpt_360/arc_easy.jsonl.tar.gz b/eval-results/arc_easy/5/ckpt_360/arc_easy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..067b6c0618f52b71b321f03e9bde3a9f03568295 --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_360/arc_easy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:313bcefbb8d00062f44b54319f4bd7d2d4ff923e23c9df5a87aae96a69cdc254 +size 1331206 diff --git a/eval-results/arc_easy/5/ckpt_360/results.json.tar.gz b/eval-results/arc_easy/5/ckpt_360/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29ca1ec19da58b2ec27de86600e330f75791ec2c --- /dev/null +++ b/eval-results/arc_easy/5/ckpt_360/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0397bf4dd3ed807e3db6b2421a2beb3042ccae7fc469eec0454e3a79ce77b523 +size 2707 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da291a6aa645851c2afae929754c5968decbc68d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd9e302d1d5ea14e1c46fe6f8a6bb7ddd34873038fb06249be8fca859ab44287 +size 13167 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2cdb51d57bf3b28d2d03e098c866e1d94da63cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5960d595082d4234e7d29c08ba0dc2e418ef326990b517c0b5e1c5ff110486e1 +size 78245 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad5d4ec4c18c29071aa5f82ee78af562ecad7bc9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36bb8b6d31d3b0c556db45408b21e28c18318889f64afcd65378c580b89198b2 +size 23101 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fedf4dc0537ebd86d3d5f7079e61f2e0b49934e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b2649930ed2250018667b7c42a06d4e7440328ce6bfecae4eade9e6c4cce21c +size 33349 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..155d853a335efcfa94c42e6a976d65908552ec3b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36bd732cbca7e2c4584a6246408e5d04c934c2558fadae1b3ad2036d984bae26 +size 28627 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15fe522931355787fac3b8960589674f522dfbd7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad71ca1c481c059e67978beb8675116ead5a522b4eecad818f868407170f08f +size 55232 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d16b3211faa346fce607370dabbcc496486383ac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9652b50bffe3443264be12fd6ee0c31b8ff7c79765a1bde64451df4209da75b +size 34597 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..300d651e43ff933bc2bc4d454b58cf667507ad04 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9135158796808d2bb99de094cfe31001e32b6acf41b8d534c5411c435dc507fd +size 44870 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..974e4eed548fda46536608347f1fd0957d9e5bd4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20d73befa6c2a3f011e1cf7ba6e0557a1b8e6a75a459fc06d2d5782b28bd2700 +size 67925 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac54096d615fcd0ca29d174db462cbde02002181 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba355e7594fea7f9d93d55012cd7d7a9545a227f99474f520f2eac5746421ce +size 79495 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5044f54591993a72b847b2853a69a814cf186a11 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb368928180920f2c9316c668c0a23f83cb53d65977232167780e9d51653d7a +size 39207 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cb0ade1acab9dcc18ac34218538048804ed0086 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9c2873e17245fa816ab04e41dfedd8b6a657ac01eeea0dc946a96c624aec180 +size 37409 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56f4427f7d3b6103fe75c2a7f0225d2ede1f13c2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:455f82307d068c7b5943293737dabe1082c0adc8d7dae062f34300a9a7b9831c +size 17936 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed649d2273c1d2d87c6610fcb528728d8d569569 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48cedeae649b138e936303e0b9d891df24d6515cc0bc6ed14bb5cb8e336273e6 +size 12412 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e7f986e3e1f1bc1b43bbb7a3064fccda2b05218 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90334f7c525b39707ef0b3ef900da7b039560868f4a0823105ac84564644412 +size 32430 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba0d1d361baa86d38e98d575dfc759d8bd6cda00 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833736093bdfb5e9bfd74ecdc11b5bca56eef0066c43d602bb4099c4ad1888aa +size 11096 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7de48109060dfc89b013f5ece81a44bf1a2c0d05 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9801554d4f95f4f34de1ec0611a8daa5357bf45ae946b75bc4688980870c56c8 +size 30708 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d66c31162da7a6cbf2819c8b2dab5ca2e092006e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79cda48f5a44f62d9bf57101cc1164bb594d52698a7d5c50be92ca3b2574f3ea +size 29673 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e6980e73af63ef7175f108c192b793e4c42a8a3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab8cf30c3977dac46f3b718533a98eea1e0c918e770636f7e8031aa877d1b26 +size 52898 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee2256dbe674438ae45e216b4a2e72e6f94cca15 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eb7209048a5d62cc5b413d079b09d3dddf3bd6c1e3af61949be42ac95cfc0c3 +size 22422 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1efe82365bff7cd6a9a2f6dac4830e99a6eb42c5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76bcdc4c9027f8a9cb3ac521aec0e6fae619c6af16b4947f8855b79f5c58ba5e +size 12821 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f81b1174a35760c8a9c424fb333e49224cfc753 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e6fb075113d4608cc55ee6ef622ec5915c92e808c6f1695527f14c8248eab75 +size 38952 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d9e6b90664afcf50f6884c02a8515b0eae37dbd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7f2da76fdda247368cf3055985349fb8b72ee04fd19cacb487e94bc4bbcec9c +size 50958 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ceb59b846864bc5a4bbcffb41c3653367c7df430 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4519211e81b22e40978a3006cf280a42fd9a344f713f98002e9be3ccfb75adf3 +size 58565 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24f0a5492d7a160816cccb0f00848bb28cc85669 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b7bfb06980ae19823e32d219f5b39a8ffd63f4fc7ab777885a6a95791b0cdd +size 38371 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5981be8b8904444c18ecd9d5f7cb967f9c98a181 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a79d1a982ad30176f576a4c62f74e2d50db41598c09c75c237e2e9be76574dd +size 47392 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18c7b29315ecec5e5aa4bf146eb752e93a7af268 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f0a43622001b92f7ea324877322b45b74ad7c08fb53f4be8d2ef6297f8ad781 +size 45914 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_003/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_003/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49066e895f81a1c8cd194372f53dce422764168f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_003/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0216bec0fdce821d4d6aae755d9eabd95c65be4638a055c130d713cdde7d5618 +size 20901 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..433d754661bfee94e9e0a73abead2566a06a8fdc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20a400cadde8d49f5b216f19123e90a28c29a27095a54e57c88626a9726b146c +size 11011 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5df756a82ea17dee89aeef73ae91ab11463a09cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb4f3c7399cb1edf5b670c85ebff35c32fdfe1cc04c65135185bc55f76a7be6 +size 73861 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50169e31f0f1cb8f2a3d2d7a23c2f1271df7f844 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68bb0f11514de536a5f4c87ca03d4e757033b1ef5e93a12ad963921075ef33e3 +size 25485 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ff4f32bb1cb43e73094c499ee23a2bea23a53bb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e57fb5f1ec63f3ea5b7b83b915b44edf7409e14b22e580cbdf5d7f5eb07458 +size 37763 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..238fc5c16a680ef27848152ed6bf6060705afc09 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf09917fc4d14f8e6817a419dc85ff8508aab15b7cd4b7814fa6d9565a6d682a +size 33432 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c2bd57ad6960057e5dd53c7ae0e2014ab318ac2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9cc6d44a2a36794b055c6d9464258b7d94b4a8f54a40b5ad3412d624380fb29 +size 60838 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be5bf58cfa1cf274448bb7fcc4cd6e873b1e25d6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29dd334081f811928839edb89eaf80822a9d88603f79ceb89be06c39c96f768 +size 35917 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61ff685b68e5859c1bbdde98ee3f46bbef71c6bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bffce21c6e2b3ebadcd36e72e4d5c22e64054414ae82f3a5ccca0fc7916f860 +size 35304 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab19404364a329154e040ce73f1b248d1e653cd6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ec0f71732b8ed92fad8ef18ef6f75ef155e042c7679c797dd3344f1b04eac4 +size 41237 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f03bd88ea29c233cad2804505bc98b695b207aa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0188b52f2acce1b9ae069874165606cdf207ac97e907ed1c580ca40c1d12eed1 +size 53461 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2fc3252811258cbf8e2a6753c19f86230ed4eb9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13176f245476ec6e99428a2aad616cc200f8ac07a6313367b78180b174cd2f75 +size 31428 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2bab93de57edb0d064f6e043c2aef54dacb80cc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52861e658d9f713261dd624ad5117615e23d4602f771eb26f6686d1611945fed +size 35865 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..299ab55731cc9a2220d4bd302e0b93dd1abeda3b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9050d6dc293fb29bb75959be6c201782d64fbf8c13fdbefacbf2bbe1575c6bf1 +size 25263 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ecbbaa7e904019d8c309db6275c4a56d26054db7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18517ae61ec38bf929ee0bb95b5c24c830a69d766f7ec4896f8e004661c7fb9c +size 18013 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e52e0e7e5fb8230a3ff3d043bcae3eef181fc36 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf5c370c248094f671f9e3f0e9fe4983c0c6cbd7203fb4261732e66afbfb968 +size 19874 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70cb562d91135c84278ce729b94724b6ba40e25b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac5fe304285d8b37cd8d660a53c06960de3ad2fb7e9297a43a3e201e85a2489 +size 12767 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..effb04a517521032315fc4b6f8d380a49ebdfffc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa7c76996d31ad8d3ded0923e8d91bdc7c2eea663d6575593c41f5621389e98 +size 62232 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d50407aca9a235b53b3a1c18d2c26287ce33e3d3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2458fe0404f26179269307030c41ec0b62d64bd811615632cb9a78ef35405474 +size 36541 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ad04ec5b35ba2b42b6c9d97a6a33120c9fa9e63 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7628bd5dcd1d452b6e733a741cde694a7fdf1f20cbd4e57e7fb79a803e236897 +size 53728 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..044637d571eb9fceb8a2248ce7631ec1942912d4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10b2a108051dd6621d7db4766dc09b22b274d3543a713261db883ddcb0d9930d +size 24687 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78cd51d77db3314becfb54aa73c7286af114552a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30d7baf373d19b3b5f4703a932d137b5bcd00259a491a0e87a5a04a9974b7727 +size 14631 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edf081623e35f595f69229e1785c879f813fb94c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:267dd622d5749ead748a316831608c2a27c3fddb6aa6c6ec59ef3fdcf52d0c07 +size 44965 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fdc124f19465ca4b665734b8df9e4fb0ef9b83a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee69bbf51a1755a412c0ff18c3e5d7b5afd6ff60946c320016777492e756a322 +size 46314 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff150efad3b304bac34e3d08b20db7cc4632102d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3de439611f353ec762572c52671d839357a5ea00a750c5700c44a0eedd1a0a1 +size 64775 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60fc73e96c4d218cdd759654121bbbfa46d504b3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71bd43636f3650099d091bc8276f052ce63d88692177eee4842e787c44341c9d +size 33214 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24c475c0c7d576ddd3fa155ecaf5b2a1a8bc3d38 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:444e9d55bbe1b2af86cc1101eb7c4fbc91d837522411b1c36c70f2ce21c2abbb +size 37495 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b1121c2f1d6f7593b34b0245b4ad88297f701e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c5c58078772663c4ed213e57a31ab3ce95951202990b6a8f3cb2d1dd23be58a +size 66837 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_006/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_006/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..397a4bac2f37229a1f55b5520a14beca93663f39 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_006/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c58c01f50401b67f4f892aabe5b040e6ea1b750ffc49c864e984e13267c50b77 +size 20988 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4725f10e0743bd4e4e9cd64a17ca034af9144993 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:602dbec31dacb3764553acfd0786081f444e8413b34a8e5afaa389c8a71a9219 +size 16581 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d8ebce81b8134c37492d425f410eb357c63e06e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64575ac1cbcefeb73b6edca7bc40838427a599b21e0d71c0b94635024c69323 +size 73983 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97794abcba0d8f98d78d0b87e27faa8df104c3a0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:304b39385210c96981fec404a22bd7e8c1c100b45f69ad2c5112535123b12731 +size 25726 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1b01d2d70f39429adcfed81f48555ba7d2488f3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f0b4f1d4c85676c5fbb3764c0bdf2d442d8f84b72a95a61ad468d8f6a684717 +size 41385 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7c0e8aa927eb81bfc5132ffc914f3866eac9430 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44aec8e54d74bf9115d783f6cc5a24b27318f52b80fe833d50080b3681569d90 +size 48997 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4430ac43d2dc4e8d782d60b470dcc96745fccd0e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9205b175f662e98069862080e269ef25ff2dd378edd856eca0484793e5a3db1 +size 60400 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..420ed91d30f9e98880bf616910e0004f72db9b60 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6839d2781d3ee34420b39ef781c5ba0de6297190a1f689965fa1b26cb3054700 +size 38078 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ace701bdb3d06a8f01ef46003a765bebaa7ca0c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c02a832240b9e5ec373537b78425957fafd2c7eb9954ec45c31cc54a3daff44 +size 30049 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7287a994206970a216f6a459d82ca180d06461ea --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4f93adc6acbe31e49f728d525a03d7fcbe3750bc5d35bb10dfe6a66d04c38be +size 40636 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5aa66ab0f742f7375df75562bedd41d6e7646b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed6f8b8ad4b9d1c0c7840b1926af3547df1cd120de007e9220363796eb8aa7bf +size 49970 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3133c66b1d5494a4c446ae0a486364bd5ca4f25 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:938fccd61fcc9372b042db5ca97eda1cddb8b7690f448164371d5cac2897b6e6 +size 32577 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bd22f9867bb7d06e70769bcea7b77d423779654 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:183fd1b5369cc7330a2365c469fe89ebc716540ed926b713d21780a8239016dc +size 35035 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc4d2369618ec090d0912e51693a5c736b791286 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9488849da20654c175806cab9b1d88d454fd15ebaa04a69c023451890d05940 +size 29131 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24bc97888e3fa4d5ea1221b5fa46e3f7c33571b4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010c1d45ee12855eb4373ca5c090924d75a1c340b6b50c0661b878809715329f +size 19732 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fb79e3874aa6000aa9c028b972c892ee883b9a6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16baa1cc5481c96934b8db851edf4f6d6c436a624255d8aacbb794ce22d74ef5 +size 21984 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e43397d9dabd61a17a48cc14796e96a55de3ff2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df31ae006e445bdaa50344f638a4a3ad8928774537f2872416b3cd179483be0 +size 18408 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96320f1db63d6a6409b79dfd783bcbc7a3ccf48c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab9a4d6d990e88dce4b1106085aeb9ac6e6f10c2deea4c3d6ae55f126e7bc4e +size 55259 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22b79db1e96a4be4cfb768d9eae638a764e2aeb2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff29d3f6c1fa17d2c04ebef5cd64a6eed747d820bc555049de6ef6c4cfff981 +size 38070 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff4073e668dd6a0f79ba14562d7ca024c4f973ee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14a6725232a473166583afbe208cb961b86fe1a2676bf16cfc74becc274df184 +size 56150 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45e112a1d4959904ec539e55155b741167eef671 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5952ea019c56552306a78d96bbe8936ee8650c5cb8fd53d3b9ba4930232d4dce +size 25321 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f272f98d0c3f88e6827c6a10b61a2f30f9fb95b7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3595f24ab5138f2ab52c702391121111159ad4ad305dcc4148947ee1f62a9b8 +size 14589 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69af70741b961cee21920cd3eec5250b5189c572 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9993179586d11eaa90d85d6072bfcd3469cabdbbc0cb1b8f43a4f01b573ecc9 +size 44441 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..affafe62e20a19204d965a7c0f106a4b349ccc15 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc2512b069d563e55c8382c61487e301f3e395de87d28875967fb8883dece6d +size 49115 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29f67d3fe58864001eaed07fe3042b5f6985f68c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94bcdf46f0da423dcc0879b3f6f237ffed516488a4a8c751610fb9c7513ac621 +size 68271 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e4cd0aa58b36976be3c03f39b31575dd3efa2ed --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8a6265d75cb8ef5fe8aa98066bd16257aea694dc3afa8efda4ef5291bd30a43 +size 32564 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3da110da3ebf1680b45c715d57fd8266bff4c6d7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:886b7184ee443ce82a72c7bd796d5e3fcf7bf0cf224545d563c253f01d6de689 +size 39291 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20cd209fd983612b559434b2da945fd57c5a2ff1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b341543ecf215cb5fff38a2d5ffea2b2c78d2b24b59261c58e31ecfeba2ebd3 +size 66050 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_009/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_009/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2828c320946163e1b186480421312454b195f0f3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_009/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f359a907f46bfd136a141fff0b99573b7cfe04d8465ddd0712b1cb8907c5199b +size 20966 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18b07b915c8f66204cff1f78bb8765a2102da43e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f4708a580c9cdfc3d0e5bca0dd545cfd2a1b4577b7d47b46a1803f2c15efa8 +size 17200 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83dd179534aaab7e1f589be4b4d50e36d14eec95 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25c462efea7ad5ffba8bd382c771b9f2a917611052557d4f10825e719a55a731 +size 73657 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e79b6e9e44bd20ee3302b718f9134404e03c84c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7824483675f4416a4c44e5ab2b7e28b1ba29a20df3eed57548d86e1b71dc1d86 +size 25456 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ed73068452f09d40b6a0a923f115f67d082ef2a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5542d8a6fd47039415feb13b1f2846ca13f8bf2fc423b2ac07aea6cf6aacc7ba +size 39555 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6fcc4b13d88cad42d74d2ee5f33185dbd18dc6af --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0c7ccf7bc8c38acd2fa6613f328aa670ce2477e6262111fa76c0ba7022da3bd +size 53603 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e651905583526658e9aaaa24c4e314816b6002d6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e5e551bf5ac8c242748d08fef76f764c02686d7cd9ce8588140390a8d970368 +size 59132 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11498a43f79ffecc0d6ddfb4c702d7acbd616c4f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec97e7550b2bcc02663627dbabf0eae16ac8346f539a1c8d60bff4d55611ddd +size 53590 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40b4649f8ff819d3b2bb11e3c267120d005febc1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef431e04bcd33b2a61c0b87615814426ccfa6637833a18663a8717685562b42f +size 29938 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d6f0182d4d97ae4c739fbf1323dad240f937967 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aaccad70b868952876ca1f88bffdbd5ed96a667616783943c076333af7f6e86 +size 45469 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7930d12992a977da0b7673c66abdeb0ca142c0ab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55069d6bc92dc297ea957a8de462c138fd4c1e5a19e51cd2b16e080d4b59496e +size 57120 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d0b8056f511d0bb36ca28391211f2a314c02165 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c7cb3636ead136b8943e383211f80cb4c00ca724cad2973bd3dd08d55cc645 +size 33518 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1590be8766e60898b24f75ae61a37418f8e9c900 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0da7eaae84f0134ecb73d8fb7d97f40af9d41acc24c342e3aad125ba7935a2 +size 35395 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af83ceb8e835caa19aa61c4c7a70750811180f38 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d61e4b4624bd09593cdb13146b4c3234a37302ab6a4032ac9e53b39bd1a67dcd +size 28284 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84ed824e5b469cadb1a75ccff92c1000b37f0b5c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79eeeff8eda443ea796021c1558d869349bcd9b3904d66b408212d2e503b8c9 +size 17274 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..567133a78279a1693bae8a1f4a87d758e3f77f6b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56319b4dcf66c21a726d60ec7f04a9ac395fd42a2d0f033b0dd5b3eb07afcd6e +size 20291 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2afe67b7e167ffd751dc4ae2ec472b62b15db65a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b3034f31b4e8adbfa3dbb3298ed4f29d0fbe0bd7e566592b1a8cca19687f239 +size 11858 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..202dc7a763b892f9517b1df0882ce8c62019b354 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03fb56102ba62234faed432bc4c4c3abfd24ff9002a3650ec80b481d529aea69 +size 55384 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e03ee84fd491a7c85cc5c749851d2b3a0a4268ec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:218889dcfe453fe61f9b4f93a58c532dba46dc8855f7c78f131efe64a87821a0 +size 37031 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b08265e1eeb2e1ca1ebbd6f688b622bce581ba4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47a2d3376f402efd30fe4b881dddc4b09ac41f8188da52a92851c1cac2e9f4fd +size 55813 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee584f48499f25d22c01c2aca8317608544b6c9b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01390e2a7a799e1ff4be1ca690e24d04272f4fa20684eb798b929117caa64df5 +size 24790 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb0f2d1602e7c176e61258262e987adbcc71c36d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39db1d8fd680b78730692fe78fc3e884dc113b0ff928d5f147a4f111b1eed541 +size 14727 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..521d45793d5ea311c1bdc8ac9ba3a7e13ec82c72 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8874d63d8d182a6daca357877dc005eb805f04d40139a364725f84d39f2cf4b8 +size 44713 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..026b9c4e339865127c3ddb1a29a2a744057cbc28 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d42f4914c2ff534f302b61214d4507fb0c5ac2d34d3cf229ee4d331a32db588 +size 70933 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4eff097ff80465788bbcb4d38b84498ce744a1e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:438cb3d0cb25e51cb8313c2827f7b640b883c13f195ff14fe70c18363188e458 +size 83103 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9627cd73acec1147524c15237787cc3c84afaf7b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c0bdfc608b945d217f98e217351854b134c3b48001164b147d0678e728cff3 +size 33691 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fa5e0420a584dec8baf86ee577da17c36b5c318 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95926b115a2ee9a2260ba61bac5940553af09e43275580331be09e9e03345d7e +size 39100 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c5210476c4243f9adf7a90015562db3fed7376f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47780a296ebe98aaa9b63701c0538f0139b40bdfb5002de104138a32a82e1465 +size 69578 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_012/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_012/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a74adb32c72bcfc7f94c984b23292ebac047fbe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_012/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0312a215fb367131efe55a2b9693c75fc67dbcd18d5d3730c8ebef7dae01965d +size 20975 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae718a4899ff31760c5b0af9ca4f420c07b2930e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc46dd1562a8bce6bcba8a4793eaaf11f30632778aef3550599cc9cb8c3399e1 +size 16527 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..930457e513fcb5f4334001a943675f090a0c2a57 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0734069e2f7b8411fb2d8646bd4269d52ff288cce6d68f44ec34d57bd22c12d6 +size 73282 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d39732f4f47f5ed18774332647470ba25bc834db --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc19a6a22ea60642c6ef946dba19dfd962abac10ce535d2b66fad6dc7be022d0 +size 25885 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ad26de23ce84e2962fec81b8b5d71e438f72e08 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5ef94cebf892b7a09e020c92b0800982ceecd22f49ffc44cf753799decd7800 +size 39087 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27db8d3cb71e015b351d27c6358c10bf9d5cb3b0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ce14e4ebac675fa4b902f699fe7f7fc19fd4e8b9c7833666a07fbb0fa4e907 +size 58324 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..260833e7b910f0a9ce37d5d10783aa4ac15d9327 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d46f892532c2c43a633170f59f5b027b89bfd95ff587f0f4a8005c5db5ce88b0 +size 58697 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c683114bdfba1b379a4ba2e4d04eea226092f0fc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba286588124467fe04bf762d303bbd72f1643c89f7449488770877d6facb597c +size 45632 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..444fc4843ae6cd0191412206b78db82e6779f21f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:158f91ee4a4fd1ba92915204205e5805ed4d32f47823ee7e333c550496a6ab98 +size 30643 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5c5f16536ed1c33bf45fbdd5b11728f2c152b8b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb290a8101ca87e8d59d5e8308fafef98a6b89b3087edfa90ada21308effd94 +size 43524 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c023f95c0a7556d43914c5542d7bac898f1052b6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a8df0266f91c0e9e2772de808e8d35e271f776cb2bdd4dcab4ef76aa84b3a3 +size 59533 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29f4133842cbcf830c45d9a6d1c813063e4925b4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a65171866ce614f15ec7e305cbc4e6e7cb24885a9e8ea2a43ccdd95661d778 +size 32861 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9881a9e7a4110aafd4724e93481eecbc3187b5b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d2b9a225c86ab73260b806ae46e364782e56dc65be2d14d140f8f95a9e03da7 +size 36740 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81f7c942550abdfa8476ce5d406907a106e1a9e3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7368a291eab7a7ae786db5e0d1c04c0122eecc126eb2e4b9a15fbece48f4771f +size 26771 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3320fccebdcbe92c3a505aa83d31db9fbb77044 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc6b7488d386a41a428fed5df13501376e9af17ed2ac29246452b642d0908497 +size 17254 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be201c41a70ee8bd4bc5782ba6e05196fb5c359e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f360eb6d077509dd89e531e6d40cf7d0c86c0dede34b02837fd914fa6559d7da +size 19800 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1dca28057613fa2fabf6422a9f9ae665a2ef6cac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8be5ea5bb89827b2ec27a4b4c2acd360c237bf4c7d392c16ce648fffd487c6f +size 13854 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d39177f5a5a8671ca8ba56bbe9644d64b25f5f42 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a431eb198ceb404a8bdaa04af1fd6ea72e4df1cb957c17624d0039e4f6d4850 +size 39212 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74dbcffa4bd9f7069449bf67551ed456902cdbc2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be1d229a65dda300192b7aaab1168c1336536541035c39888846c3e4623c820c +size 38575 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c91ccf01ff4e43fd47eeeedc02a37fd6f56cf7fe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8d75c1da0bbdfb11b0119a9a19d5cc615ff84a687142e99fd1205fd31a9a3af +size 55605 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cbc0c040892f171c87964601e3e7efe78486d9b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9113263c41ea01a1a0ed3e0c7b5fce257f243c368f0decb3f9050b73b1969b12 +size 25166 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76dea510bf8b3a40be7456caed66f1b8454ec220 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4cbc6ad46f6ddfdde8daed570d5d204bff2cbfc73bd0195c0c3512e03c0fad7 +size 14589 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ae8a1bc102fa628fcc2fe8ce3fc321ffe56fc51 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b05498c7b5e0a255ffb8a1136a330e39a6bad5febd191520903f297ac67be88 +size 44752 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..422e2dcb920ebdcb0c37ab4913a7d9370245eaff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff088b7790dda4ecb93ab27e89f0d67b5bdf45b6ed6ebd757bea319c5c4f5317 +size 54578 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e225a1cadd7640422e9baf09e26b5717491f4c29 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:734d2cbcfc90956ff597ce2bca355249541e91ff5951eaf52bb1e9f98bdd31f4 +size 77454 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17bef89cf9ad31f460a73eccea00be1bf1428f47 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94411087736db522374679db74730eefe933a4a627e7f013b1786811c6b5da0c +size 33583 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..524cdb0f5754dab437a775283211972cde149163 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52e5d3ec715cfb12e9a192c5cf0b1382076ab2ec24e19f0748ec7e6852cb3f53 +size 39417 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa4f6987964f81a0977fe39b297b1d25957e5cbe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9510a42ba3cea582442c8bdfab74fa4078d2249f8759e10140b04c11e34e2d8f +size 71129 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_015/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_015/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39f4e388062f25864bda354caa1291b304254918 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_015/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88fde6fd1b4969e207e96380dffb86cf1bcfb9aa022608cbcad397ed538548d +size 20961 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cafef1f2612b3b46ce83f49fccc26b028d9f1c97 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45086788d6b3c91cd31171e55af80087077e7845418e5b68e960026884953ca7 +size 17064 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69679954a742ff02ad24323b8be97b3ca4609e83 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dee5f8a215b32a08d5a511ad40232baf14d251b417676d1fdcc14c3d65ff2cf +size 73754 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d89e5d4986560b8b3d2e94f3000d46ed437d4bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab052f7d524439d482db74f8ebf4d8f4f3ccb02ca3cd4248d29551aebbb128a8 +size 26148 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a3cf570d65715a8292b2a94142c6a397d7607ec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dfb50799df62296954474a3e4bfbaecafc1be8f7250cd6310757f912cf598c3 +size 42107 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f730f7522e20e5cd0eafc136dc2c35314715ffa8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f3150ed74419e5047be3a3d29f3e6151bbbeffb9bdb5e90bdd032f42397ba8 +size 63218 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2693b136216457e165c1335ca20e44fd98eadab2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6239640dd1014f81dea946868b69291f135384bc5e4d9205e3891e863c79ad +size 62052 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4e8048326ba754d6adb6a3be34f858ff7607625 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe8459cf2f1c37e21ca12a18fa0b84caba3bd75e392ef5fe296f228dd36b4d9 +size 58504 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b32137ed5c7252bb659cf40e742d4af44d15be1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5445acf5cd2aa23cf59e0e0378c57ca25f04835238d3cd2369496ab091d0a906 +size 28778 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c0d198b0f9ac11e54a06b8376fea45209c47390 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20f77ed4376088f3d9bbdfbe9250dfb8ec76b010d87bfd45802394b9bd0c7967 +size 43030 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec1ee24e11980197235bd8d1cd981f06bce029c3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc3abc14a8a103f299b09f110c4e0e1dfbfc58d24827c01c74155ecd5db980b1 +size 59749 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9207fd8422c423377b17fe11916cc88224f9ab40 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82ab9046728a301e2011dbb02f0eca2051fc2a8c0286c55d78a2db2fbb4f228e +size 32851 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8e73047b63b5179a9d97d3830c0a338c57b717d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b9e42b255c94786d0a060dccf7055766d2fc2d5b269565eae7e7bcb63b97872 +size 36885 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25ebe70009fe997780a25a320926d584ebeebafb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1c592c89626f16c8d55db65e3cd993879fc8e9f62d63d4b6c945683307bf002 +size 27222 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..836062a7334a365c5a34ad7b7c547fd98a0df318 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8aa6725151cf45b10eda2de74702d0024365210da0e8a222715bb8ddb82fa2 +size 18637 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a167e63fc2d5a78ee6ed5a78f51cb8330f2dcc1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f0dba6a458eed0332d03e89c9cdf5a444e44582271bcfbf222d8ed4a94134b +size 20648 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f42a103470fd2a16040430541b04fbe008d5b639 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4415f5ab503ee762c0b81f78a6db04cafdd1f6ff1a7490c46ab98afa9fc5597f +size 14191 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32a98c7f19574735735c898c525469a3c525b874 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb050bd826d25547caa3b4604a459001211f16bbdd9323b2d432de04fde1a62 +size 43917 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4459992e60f0565753719de748ed94900fe7bc0b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e1b97d54deddbf835e275c1010f65661111d4107b8fc2e40ddb2554b084651 +size 39123 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed866e15a58c796b16763c95fba251d36f5599e0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7292a9073f47e0ff9ada9491eb717876d4468bd6b3da4a941a9aceaed50a540 +size 55392 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32be53ec5dd863d285ccca21d56cec65389e7687 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd3897f5fb933fba0decb633f94c4a7bc35774c0ae163f4aeee38240779f63e +size 26474 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bed6aa48e6ba1c6718ecbea5cb62b2b049314ef5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f0a17b7d72e3de3f19eea50fa6431a29b4764ee6d651b98e218cbd5751d7e26 +size 14467 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a327e52d427ea1e456c9fc7bb3fed2c5cbab9d89 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098836691279993e953cbd911c90cb20b620ba6a9c7a4b7263025f7be85e77f7 +size 44766 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c427f80da50f2eee08353ef9674e96643967428 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f9fc37faf0df9ec6bfe9d25c031727f037ac73514a7af5e8b0409b3db74f8b +size 48599 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b89f2f31524ec4bf610edb7ffa4ba6d188f8d7a4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f85a418f67ee80b2dc254770ef63a19dd7bea225c168383f640e3349b792a8c +size 75911 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eec25d8d9696dd55b18ec6a5c0b5c34b83b395ea --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d62f72ef6596d49be092016ea17f7214901d281b3c9d3524adf92eb370eb761 +size 33070 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5419039ca616fe6849894a09029c8a7c635c7246 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31269b64bf1ad20208cda898a89d80192021c891007ffa4cc1899aaa98c5fea3 +size 38743 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ce3c74d3cf06e7dd17fe2742e1ff9a9c1993e90 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd23f6f7dc2a06ac3b7b35a2a44b147ce3a57d750bac1cce3fe8dc579d1e789e +size 69019 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_018/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_018/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3321a1a119ba7a179a7c8b5e332d90bb23d0cbff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_018/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6dffc82821ffd3713dfb3a107aed8d20dade7da01d3fe00098c82a9fc4d3b4 +size 21002 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..556e370c2a309f315ae2aa886428e4556e3aee6b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07363be90740732712a2249616028dbffd4965c40824e3b844a04645dfc5e137 +size 16664 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3ae29579d31d4bc13edccd4dfb27800c57ab5f5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbd9035b05096b11cfbaac21fd96327ba1be9e076e3659de546a06527c5e3a6b +size 73814 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b48c0e2ba78f71fecbdc104761c76cd8eb4bb5f1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dec238d62f8971491245cebfd7b33c007be6a73c71d5782c1acbe6a722af640e +size 26738 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27b54e1a4ac63d5790a798e661ae1129ad639602 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e59bc2f01d1cf4727a475eb18575774094aa3dfdaec42b9cb47519bdd2ebd62 +size 40123 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cce2783f0d6343cbd6dccdc27f6d78e604b9a52 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9efefbf2a7ae753e621475e6543e77203b20f01d530d36c15ed76e872413ed6e +size 76643 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..314ebbf857d2189543419d7cc2d01f6eb063bfbb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40012a7c6050d0214dce48da8010a23636c03f58c1df3cb1518afe9343da4c2f +size 60589 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5b660943fda20bf1cc48863481456971da4ecee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b94f4f3e6fbbae4190481d543f9a1253a7df1fdb519d45f241583048a5b0a872 +size 59365 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..616af674e672e79a311facd9df72d8a2af9f9e2f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a90cc4954b9fbe00d59924bd5f3f3080bae021619907741a9b0cb626f299257 +size 33474 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91af05b60ba51687828122175c47a8d1f50e798d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b05fbaa57b61b5403207aec2119e1da6b61958ea65f6936486786238d80b24bb +size 47454 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb63a06650be06e2c1007ce2a53c0dc16f468ec5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fec1f6c0670d190f59b181a1fb4e82997f578b9cbfcde2a34feba1a4f415ee08 +size 60839 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a20d00e69a36925b57b5628a4d75a93d269353a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeb7dcd201fb13480777670b6bd38b663df511f45a458bad962299816ae1adba +size 33375 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fea1ebddb82b5566403658d6a173404e617827ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a23b6621df4549923793cd66945b16f1a765ca3eb4177ffc9a5cfbf13e0663c0 +size 38221 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0634e05c8a01446b56b4a515fa929e1192cf82fc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f22f1aad4d350ef9981b8bd500e29b72baa45f11a30c984cd10f96c00ad1e3 +size 28097 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23077bfef61a92ecd48b0a09cdb442209eb5af4f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312bb452e8bee01176a8899a46f32142b621323da3ccc14677878e2d7a6318a5 +size 18171 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb82eca27326fa08e7e29f6da7f37d4f4d72659a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c3f676147ffa05d8e967db9e6f0d0bd74eaf1040f781e63877b1a8ef406c48c +size 21145 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4eb3db9d79766441730f96e0bbe096ec5672fb4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d471ddc797697d82cd9a7ffcdb7872ee6b7e645834bfef82823adfbf77a53251 +size 13676 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7f0cd39732ece939f16cdde0c5b7c72d17278b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:749071b97c351afcf09fddc7a4f7778f3721fd04fa4eae5b768e353f2935f410 +size 37136 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0baedda4812c0bea545baa840c7d06a3f381c57 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2629dd1050620045be1bd437de722824288e59a63f61f38c2c41a46838afe81 +size 39619 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f69eb4e79b921bb26390f9a3fd0163aed3e494a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e91876683f8605ae3153db303e0255677ddef8f046033af4cee556a302f8bb70 +size 54681 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3784469a7e079876595ff31364c8643ba5004d9c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d40106e9ac2ee3138a8c9348ad6b2cdf3abf0209bb5b7c2de8836a9e0b4f3d93 +size 26429 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27e5397a617a6990865dd036593f8dee99fd0007 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b98e641273df997665da3f6451cd4fafdf272a1c204212997c7a47ded80118 +size 14723 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fea95c653af7245ed3c90c084d8cda382560685c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b9cb7b720483b6ed2030192846c12d0f86d0429373791e47e22481404a9bfa2 +size 44180 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73ca74167c93f38cdd3a22fea4b97303ec0fc14a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:564a8cec85329e9596575f180d88f7b9ee42f826aab8a9114b4fa3a9c11f7642 +size 51930 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56167943e558defb4a2df300acc80e10314d87aa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba373971e306a8f671f0b1727cd7f27e6eb19ecf2b30cab9fa155095f012ce15 +size 79490 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0015c804d3d5d825eb21be286a3d2c651de7299 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed282ad952022f9aad39afa7e6b254216c5977e1ed84b17e23859f650f22227f +size 34299 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31e345f914b1c7e8f87ce551820c40791ef4c57f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:087dc92e7bd5cec79dcc8b792c2bf1611e9df396dfff5108db47888ea23bf99a +size 38784 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d891592786921a73cb6d63f2bd8e98cf1fb59767 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b65c24e490e88d49d7d786c233282b1ce5b755714cc46120527ff1c22ede5a44 +size 75964 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_021/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_021/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5202418297f81e86a2c61c927fd30a14d3ef5019 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_021/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8483fd03ff3c77e9ede13f08c6d1b5c070f708ff099ff7650e1b50b7a400a44 +size 20969 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62109e6179997bb3eebe465cd629f8c9154bf10c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5e2c187b83d22277a00a921f20fdae5207ac2e42bcc45ac2780fdb7080184b +size 17677 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..364a7df2b5abd850d8e0bd98935759d8b0576188 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d350106321cab69098e8ac650de5f3ac74d948a2e121229a8a6ecc632336eed +size 73265 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb105c0c5c264baee9b7463bddafa62efe7a99e3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0f266ed17b678cd6c027eebef1548ecfb2581179fb1d67619cf1510f6b599e6 +size 26327 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a82f984cddefc1e2cd51c8e34e281eb01e57b8b1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc9b8ad755ba2ab4f6f6cdebec1ca09d21e490385cfe6b6ac5b96acd5bb27e71 +size 39999 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f07a0fb2f1eed7612b50d61dbd4fc26d94fdd378 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5339a677cfaa505ec304f46250e9f8b9833d3545b3f819a3a77afa6b710bce7 +size 68093 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79a0af969f9c0fad73594dfaa778f8f67194f79a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8c0cc742b688d932c0ca211aa17fff5097b3a09f917e6b0536c5233d0ec311 +size 60394 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d767a89e1757ee29f71bf1df783dbe87440b5710 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f23a0c3d5f17b2e365fd11db799d02408039ee628d20e779ef4c0f3e6ccbb5e +size 57735 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2bc2f869a860afe647269609882504962926e7c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a6a5bb40d1402912a7e6bb356e2036a34029363b0f2033f7449564f03aed226 +size 32751 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91a5caca55057749458b3cdf1fe9ff44e3e1c38f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f90d71dcd702a04a4518f30ccea8226e4629f1ecbceefb3d5fba0e68599e926 +size 43612 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9442c11e485dbe6e0098cdba8d75e0f8a16b796b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cba15ec419715b4c9f970d0d5522d9733c102b83683df5df91176c14f32b596c +size 55852 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03f089321d3d5a8bc6b382b39aa9be9440f0655d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a07fe2220200fac001574fc3466f3b38d3f9e089c596e107ad6d6259514ce617 +size 33163 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0218c1c9a92a0ea2feb0d0e9cc329d6d51be72dd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e83f47298ad664eb0bb3e359bdb1bf9a31f65f923d66fe16461bcbbec51fc6 +size 37300 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..876e0feadbff81d8213be51bb88bf9506fa85be5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59fedfc6f660bcf060b04503a35c3acb10aed01d2cb2cc9af7d700b4342a02f5 +size 31690 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2930a711e890921973ac9d2078c41e50f8ddc244 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f79b78ca593c01c7da93539c1dee16e471caa9b9fb390ba50d84442ad4cf72b +size 18334 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1efe5fcc8a4d62734ef76397115c19b8972c5ee2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61f908a62d155a4631e79359d85abe4c2edac430b75506000faaec4e261fa6ec +size 20870 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbd0ffb3e5159760c39ab4faff7cc048148346ad --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1501bd61f4573363812d6074750566d930b2f141a612489fb788efc7427beabc +size 12815 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3147b9f011003c0b6bf0b0b2d1fcf7b3b199b12b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f61ff8c2cb0d150e92ee1ecca88c52b222dbcdb7cc4fa80c1fa53797f0d541e7 +size 33737 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c067bc465c1bd352f43287a64b7606dd2e4e9736 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ee2266ab1dd88df9b06b026a698890754dede490e979d68ba3b6d7697eb1f5 +size 39815 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73429b7ba9e100fe890fa4d7f8123b5a4f3dc2d7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed66da8a0fa8ddcc4975b6531832d6ac99d6856b77b4ead9de9605fc2ba63c4 +size 54814 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10c20b76566a4335557fc5a4c9ef7541b7761f9b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b517cd49915bc62a3baa387b437c101eda20e400873e94345d9211354491e864 +size 26205 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..638b28ae11c17eef2fe75efe2a7ae1a642065489 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbbea37e9496cd81f8170dff9d55fdee56d8130319fee36e196bb5e282e11e7a +size 14500 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c03c0e205a6aff2f63e05dda758fba01ce13635 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2426cec754257e6029b308d0c7b801b65f2f32433350bf8c5f1e8404c2eaf144 +size 44827 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32666fe53633ef40089cfb87849b724d041f8249 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0317184d6b2392991a646ef22ff5fea9abf96ce53e1eeaf157d2096568316385 +size 49289 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..078bb23aaefce95e7304653b203a16c5128bf81c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22bb8cf83249b35575ba648d968a1d047770a2a37b81d3dae3bca00e2e8bb50 +size 67900 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12769ced12d8f32826021e8160a2ed4d2ef1c5e2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9924497a47108508163c82d23e301247660f85cf0470634cfc6ff6c065cc7349 +size 34212 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f05fe1f3216d63a2c005c898ef1dd8fb8b0c7b30 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04fb089eebfa9c48fb48184dc5365f5dd0ec1e26deb57551b996608006f46234 +size 38652 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3859ecc7ecd4f28dbc94065173f441c10b68a2ec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f89bd83cac0dac5e578eb8131c4f04ee8cd16ed1715ac4662ac419cf774ad520 +size 75390 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_024/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_024/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a67ed98fdf658fb3dd66c7686c62d28ac9433991 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_024/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d32e28dacf14c82f74d0513949a9c9caa84e6c420f1f0325502753b70626681 +size 20962 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06faa6a27f6a81fb1774b11a913dc182c89e7e00 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f48c5374e30fc3ecad9594f2a492384796b9eef0ef8c19146b1fe4ab562e57a +size 14954 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0feeeb30ec6365d5c6bbaafd6e7fd077ea5f7f6d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e89d15cc54b5443eeabbf5b1d10b5bcd2f9a34cc12c140a61cae0552b5ec3458 +size 73780 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb2af0c1ad710ce56963a9a0b827feda5873dd23 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1705605db8fb05b51b2be1befbc3fa6cdde6677b02f52bf7a71ddd993f95b78c +size 26270 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..384dce7d3b4543b9929300607e03b51c33ae4ff7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ea2b52d6ee592a3d671224756a82be0e079587320ef8e2a52dd8af1ea31f754 +size 42453 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c31347ed80b354b022627e1386afbb412b7aea25 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f58b39dadcb60a1334d3c0585251c8daa6838e8990d7d760963f8220aad974d2 +size 67187 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..820501b33256b8009e2506ee17b11fa77725ed82 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6916bb12f90f98f27b49545e09cd04a65bbac98c48d4cf63da058ea18eee6480 +size 60460 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d78aa15f1e24b3e3a7f52787f2f9f23c16f3b97f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3280e51c1d8cf095e7446f5be00b88f4a99efbbda07372e7163c0c288abec422 +size 64011 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a87d22e60016bb141eb41873a25ccb4ca7900e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0cf21a7c435fb182b117c3f6838cad440eedecb00fc19781e15d0ca0c06b061 +size 35085 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37b433ae622eaa8fca5bfbb59347d05571ec455f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cd870b4978668490b30b28e939af5d8a852e8f2ff33b12495e45b0d7edd8f62 +size 41581 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..446da59f4bd4eec167ad722b24bf991d8c411080 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad1860146670fad6010821517b9166553fcb26d72d948ead2237f3224399b93b +size 55674 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12b351ba7112e56acc894b34720d593eb53dfb85 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c77c6150ddc33a3a094983b0845101831ae3df71f51beaf57a26e16ad960322 +size 33154 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75bf767304823eac476b5180a29617d9a7f4365d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86215fde1a058833bf7ab1d573c1d4c5297cb50365060662b4fe69fb6b0cab0e +size 37281 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68fcf93e76c378469a1d06bd2fe7080c56781b37 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba01b0f293751c67c14da29b541d764b6b250199046bbc37c9da7307e9a7d7d +size 33311 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e18f2b8d937793a721becb03b40b94060ab56810 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b1b47a7d23f37b74ed6db385e3b7415a96f1eaadc293a62147d4071ff3c856 +size 18246 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37d25349ecfc213fde18c70321e68246e9d760e6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f633b418b12383501076163812c556bd5064166135b5859484b026a20a8a8e0 +size 20687 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b12b670b0a75f8143e8f7ae09cc410d51a776451 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a2e95ada6907a753278d659e4be16a7836b57a069358e9763ee62110b6342b +size 13277 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58c93675f251764b3bc4385ea2ce2de525e2a8cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3e3d71057ee6c69bd7af8aca0947375b7bf83573cd0ad957cbd7e19ee9fb1b +size 34605 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8615d845fb24281c64da82b16034cb23a57fd165 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08ca57b5a0f313c99c8c48bedb2e83982e5b98b0b13addcf58acd258286ca826 +size 40402 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55691198b48c36fe8584e621afec5d2ea83fd961 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5a47daa145b3deebbcafec8a757685712644e0da6b18ab122fc80d25067d9a +size 54721 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f13e760a2a92d07c06f6a7a8604407ea5c1ceccc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a8fc17e53efa017c1ced59c7129aea19e5170349e65e2df571f4e8151f63c6 +size 27269 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a93e05166b5e605bebec7341f5ae0afc73747f0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d08d1504774a9089872907aeafc43a86bb5f41b719c8e1786c2aea8e53e403a6 +size 14733 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea3e446c3b64b82aefd2bc81ee401ffe393d9d69 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ee9a6451d6dee18f1e45a8bb8208c6d6b616767bb62f59ac67c3726eef43e78 +size 44657 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..349daa1af52803d33749601a7f69b0311f2bab7a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ae728d6d926d977c7bf7561f97aa6f22ba67cbd0235bf14a3bad7d67671b968 +size 54702 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e718e6bf1468c96b19b007519b5d78524f242df2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e24dc9d50bf3965dac6cb15717c879c3b35c64e8c32a15eaa09ad342bbf091e +size 76973 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f68dcaa26a4e818e95a21667fbcbd8ec534c34b4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ee2028de01d6221b40e4459aa6e4bd8ee14a244b50fd15882cc6a3275ef86a +size 34537 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff0e22970b6c049954b9e9b1054c59d16b209366 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b41cb9916a56b2fc050542520d9b15a0fc1df0151639e8fced15aaca2bae703 +size 38579 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a0f6a9855bda4582ecba9a19ad6c84a3807393e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:745ad9c30720a45155e459b1398044a3e4054ba13a8bd19d8df8a2988ab9fee9 +size 76740 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_027/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_027/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e09c0fa227272cae2ab14e4ca5505860c22282fa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_027/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d69269a7f8dd34c7ddf89678dc1d02788de416d20680db290229372ccb01f380 +size 20980 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8340152febf0be01e85a4012e0088cdc5bf71fdf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7501485adb02f91156d59628d8be7a1aa9dc953391e8e35d8ff21dbfd84f756f +size 16868 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21ae694813d28538bdf80a18a1207f2347111816 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abdf30f9045717fb0d5c36ffd4040154d30d1f9c42660ab6be27f36598434882 +size 74520 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..140ec955f72dbd2abf52e9537dc0461928394449 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c89416e9f6c4d22320c03716ed3b0aeccfed33656b591bbc8da3f2cf8cdc0b2 +size 26433 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29102539ad244a16e07f0271d3c7518d9946de53 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:733acc3482c31f31c889f34bfe9352ad95bd0885f227ea65895f2f45451e21b0 +size 40126 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63edc68b564e6e7ecbe90647ec501ea42d803cb5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45b7be1354839053773a864a53fc93e829e70c58d67f336c8db001a1cac90738 +size 76296 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf0b5f773ca88545c530dcc1d2449136236290f3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f8775401fd507768e35e58de4e1629f58328b3972ac8e5aea142f469ce52c4 +size 62121 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef55541aa29b931b3239d35657e2c1677405b8b2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a94c633ada639fcbb6af42d4ef284a5f5221ea71ae3ee6e5c6c2fc3b28b07fd9 +size 60202 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d66244c78df0fcf29d35af7d6611d65445edd0b5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee5431c90b363d510dae96969de2ed6a51cfb8ddba6ca2840441228a324df4f +size 33214 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c433bb35db98e0fde7ca070c1814f59590162cea --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:735b661d1c8dd015fafd978f29351fa4b0752788c6928682cdba209213831db9 +size 45609 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4877ee3550a6752b84c19b7c79abbe94c194e35 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a3da20019284b3276a77157da2f7e43e42a8316fda5321cd9ac44c28d7bf79 +size 60481 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4eedf5ba1b708b05c2748617f95182d7717080ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff72691c9bbbaaaface36e8d682c035ff351540f4cdd4549b9c56efb30aecb1b +size 33462 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a60e4c2f09c9bfad87f4786e6b3bf2a26c9d375f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a725c656620782a60609bfc70ea23f86650fa05a6fdb2199f14f10487f44a38 +size 36788 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82a56a5f6de4dced2016054384a0bd9db6d2799a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce36e267e6d5644d01b88e4ef8a78145fc2b374b06f47bfded168dc76ccf21b0 +size 28920 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5b491ef51febf215bde340f226c33f3dc1c950e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec88d3bce1c32e07ced37b8dc127eb5b154e4cab5ac48dceb7c99156f96943a7 +size 18012 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0132e909612e06bfc2bd97d250d8341576039dde --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:749d07a6c274bc941346a157f01aec57db4aae0a85331a0eae77db810ee129f5 +size 20831 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..393c8844ed942e73f237de6572d8bfced52a6855 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5577d6537190193a9012b9cd4a36fb8d9e57bb12afb69f3656c31f04be12dd2 +size 13417 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c37255afd838c37c1f84470432a7edeb8f849ef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7ab0667ceaa9b7919998b7ad413b3a6bee8ff06f08344386cc62b0bda7cfc1 +size 34860 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b598aead78fdbf2f21360fb2c7884857b1b17188 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cab4ff01b201b65339e3f7158143335286f810c4a6e06d4b75aa146e493fbd5 +size 39234 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ac320eeb10eb3c9854145aa99a51d52c8c0cc68 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:016c61a301e3b5cd55e4d1fd4c96eaa18fc33afdeb727f4053b6e7822f444ef9 +size 55065 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2805cede0170a4c10ac64deab5c2218af5f5c50b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e2a769ea487ecf9d718897a5e798c662463e7ec62e7ce3b64c74f6c984680a +size 26588 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2526b92a8ead73064d5c3636873f110a50e8fcd2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1798144fe942c1b440ebbe1122db6ff4bbfda2581bc6aca9ff80ce9122f066d0 +size 14773 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e044e7970b62ca5f521034259dd8fd8c1a6b261 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2b7a3beba38eccb854cd8f1c107ea0be587ce8921906de54d8de32f45ce4bb +size 44683 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d618dade6e52dddbfcfaae7ee148f00d4d791a8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab0923ed324e12a9db24eb5c722401e68da332118b45a1e8836747ac12fc44ea +size 51338 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d97a22dd53832c5d3e0934e602f6d9b97810f26b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e1feffa16c550435ccb065ab881f63c348c4545c5ee38ef3a84aab51b8bc93 +size 73269 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a76c1a248664bcb105e0d5e791522f5253695edb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a7fdfa7fb02bcfb663d5ce28358c57a329f46faf95dc12ee3e77cc911f0dc7 +size 34395 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44c7b7bea61ae6ae97a58e1543af18ee9f15934c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:079e53fcc6bfb9073170ac2871ba1ec2a38d285cd46d8c3d5336e8f0bec1fb44 +size 39170 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be5650d260e2db56846396caf54ac10ab5bb2914 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aada56e8ac9cdb37059530f8a5176ad242c0c2a561cbd408a7b92ef40fe34c1b +size 75469 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_030/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_030/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84a09dd0ab087f11eabffd6ed00e7d22449d9edf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_030/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc656aa9351c93418f3ffc6e04242372eae15b10e6f9339387cece18cf2fed4b +size 20985 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38c1aea6b0f4302bd110607088cdc0ca9d94df71 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799dcdbb2b882070e00f01b823f59248199e2a990fde69547877af1d9e40c2d8 +size 15954 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d286d763f428494e95b7fe92536ba2d1d8cbfff1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec60ccfa5d7223f4c2a61ae3bb3bc3b51094a4914a1a07bc72a6cb17cdb517d +size 74047 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1d24e674936727db8f7c7f24250b00b093b2cf3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e1155ae60200491c68559c000c6309f16e67a250df5ab28428c8135d5e4f8bf +size 26578 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4e4b2959e9ad2985e2850906b71bd5e0eb5e6a8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc4b00e377f482a45c9450b17fe7f35ddc6e3f935a56618b07325be5c8471ff +size 42509 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae8f4a903e55e80446076a43aa37fc5653c8343b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8462a55f58cfc4d88f85393085d3326c73f20913a64f19b9b02dc143be5f891 +size 73759 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5eb27f3770c0f253003f28bd1eea7bda09d8155f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd8b1e71173c1a7da4399a9824325cfb7b6e1c5ad84274c567b2209c695e1204 +size 59743 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..715b800d251b3273d225a33c6143c45dde9b1d6d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5837f8608d996ac216ebfa137bac8e2ea068adfc6dd4350a49a88c1b555942e8 +size 61277 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d8f7e630299eb71ca1be0407012562e94cc66a0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3e35d4bc9bc8ecc37fa0bf2817734f885782828f6477f3b8da9d51843a796c1 +size 31970 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40347e1eddd2f06b05ee83452c13520f058d766e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:017ada283250e31d0eaedcb0630a22fa47e1b127a50b8ee204753756bc571ad9 +size 45225 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e531858fc9f934cde82606d7368530b6007d8995 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b51bfe6aefbc71ea51bcd6977bdfedbd2f9920dfb0e9fea01c6ecc7bb6d7ee4a +size 59464 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d57f7819164729788376a33e7419fe97eeff1ba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac2362f1cfade7697c6ba9ff300645577efcba83b52ec96c3519086729d76127 +size 33573 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55231a3895a03e569a6054476d3ff78e465bb0aa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b5d34e3401852bd7e65819a7224cdac6924c0c2a8baf12d86af15cb973af27 +size 37375 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45736d5ee931fc29592ca7e5765125c2c8245e44 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1714704187608e7e51a42fc2df8d8af989c521fded27905cb99b7f0a5152b344 +size 28597 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32a644ff4f5138b1c0970d472a9767474331b46b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7c6ff4e5f4b4ba4b1c7062e2004c088dd2e8849342fec813d0639fe4b6e8cee +size 18905 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ded6cbd8a0a161e96c5a3b6ba1dd5693cc7f605e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9cda1a4a501da6fb9875716bbcd3274590fe91d83c05dfd619be2eaf2b5e1e4 +size 20940 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..726c032ed76a9a78b860d3204d20428599f9f76c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59663c1594643d4ee25eb7a10fc653866687b27d9ad885c07eb52deed282a65d +size 13578 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ddad3300fd699cfd554748b419f3e92f590ac6dd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5c4aa4eb10e8674b23e92ce8717c27244bc6ba509360592fbc90049529c1d2 +size 33096 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c35b774a35a2b2ed55983395c307bfe965f6366 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf98b497009d5370b97387c4b5d73f24337baedee8ff1654b37910c49797788 +size 41503 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c02ef6dbceda92c0c5dc149faeecf6d5a5721fae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa93ff11a973b82ce2371a11c097f0e493ce512290d8d86e325606de4d09d35 +size 54947 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe6969355686beee0639adb2125370c345aeecc4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7e249d5d473ee842b1412e8a6fb9c9c31b46b91f2f3caa8999fe6b657d1ef2 +size 26244 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c55ed090fcdeb437aa297a2306093317b70d00a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e52139d58f8155e3a462d53169c8b3d9af0cb0b9be5d38ea8d2d11a86f246d45 +size 14650 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00de2626797e7e77cc8016fc164ffeec2b181dce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9486e51017f29ec1b055b14b7d4209dc2199fc5d85a151e8d4241859052e64b6 +size 45104 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4387be419b12dc3913b8917314652c3ecad27fd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c764f46c01d3df8897512fb86a01f8862c7c7834fdca7c9193f288153015daa +size 49793 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60fafc471b50e069bae6c390602dfcf973ef931b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eae8cdaa0a3cf5c9453e2dbff88c35fe1661fcdb1ec1c2b6be22a4f1a10f598 +size 65276 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8afe247f1ee795b37f719a957fa7ae95c1678a2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:773d5ebe06efd633c6e2b5115232a3068156978991fe7f1e70bdd1fc5491ac1a +size 34689 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25ffeddad894917feb07bba5fea93fc02edcd858 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b917b4e0a68b1d6c451a066a7380a2dd9deca86efa88a2c919a48a6fe7ce07 +size 39069 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50837e477d2f83105c8240bee794b64cfb64ffdb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc971b9b7fff55c25611283eb378d2fc1136c42013af550c4eda9aa43dfd19e7 +size 75453 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_033/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_033/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ee40d38928aa84e2008139335fc837c8c45e015 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_033/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71843949da4d5f2c917f0ff4f0e332b320ec25c28a59b5cadd5135118ed20125 +size 20974 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a32b83e6cfd26abdc3868ed343fa6f9773863bc6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa0afca7a9e06b7336058aa28c6448267ad8730f083eea5d6cfb1f5b3bece27 +size 15956 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5a1dea48b51159b6722de703cf772600efe4a41 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b7f3dd69e0c207ab42bc5c698f697c2ca13b0f1a0b921cc9330347c2999ac7 +size 74718 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3c9d54c806460033b3ecc1a3a09aa52c5f64bee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:550e0b840ffeb9da7f98ec3829f5b35953f313a2d6266814c9d1dc1ad2c61776 +size 26575 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58c71b41234415e3a2b3db8341a296b9bc858c98 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d1df2b1221ef5623678922bb9ec8586b539249c7a185c764a6742ddaadc7627 +size 43301 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3373209939b7b28f22552ce5b01e49d90e9b807c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b61df16c3686a944ce9d87df74853d1d59d6f363abd57d9d9587dab86035e6 +size 66725 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..459a5b055df17e21b32bc3a36f959b73af8e3705 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53cf3d0e36f2821c7b7235d81eaf405849334ce69c18f1d995592f9d19cd69e5 +size 62034 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9628776f032951725804d89771a64246f7ff1ab7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb76d53a90f33a2380fa66c8574c1717c676412b8730b076c9ec762e7cdf2bde +size 65301 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb804e00617e6dd3e03de72a06bfb216364d2184 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8684f47fff6f6da419e7f7ad2c5331048ba842174e875babb5cdccbe85f2f31 +size 33588 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bade314688ebebccb642fa983277232baf39916b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8d016f07686d4dfd509b506890c9c0aa8dc022dc8561ab709644d71d942c35 +size 43802 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..334e81c6004a84ea24437d92b39f317b6f64e9e0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d503dabdf16ac6173dbb18a15fa757a7efa5af1de1cd15a6f5bbc5b67e6da6 +size 55522 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85d183272c791ef0e45d2521bc39b88a0be481c4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461a01944d38b97b9bf209fd81aa79ebc902e69834843774bc98543a4a61506c +size 33201 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a22420883d15bc79de6963ff53eda5bf3c3652fe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea56eaeaf066f9219c8aa1c8ecc8f138b2638b5ad70ea5961ddd4cf08ec9e6c +size 37249 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcf4e95cfa8bad742a08b4aa96d84785888c9b6d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:401bb0b1ee8088c60209f9024d20da81979a1df56a86cb681ee5d286ab94b30c +size 31674 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a54f16888d7afa1c7f2f72d70444bf31c69eae78 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:713a303e65e65041cc6d287d3d06f8c650b158e7fc80f65610b803200e432c20 +size 19793 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..851b702293af9ec25308b4d6250cab92fb9633e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:768f9bef09c86ed7e7454839d008d583b4bdf24899c389fc96157c23d000f7c1 +size 20894 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3c34d31504488b08d5ae9cf6f11c685cf644775 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e960de29e59069d385ad7b1de7c5f874aa2fb600cb2827c2b525b951edf58e1 +size 13264 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c362ca6e054667f79db45353c5ecd9fa75106b71 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b34cb56a82d9708fda34b8e33b295acdb5e6940441d2dcbe2c445aa36feb0d9 +size 33366 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..562889a5bd1ee961bc8ee1daede10926d24bc986 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b137d7cefeb09fe0d09a4cff934c6cfb6ff5bae2ad812363495e36211701787 +size 42442 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6b4ce9665aaab1f6931ff89f28e8009066f3cdc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5590f902af61ad0bb0779ebb0d11504f5eb515ce528617fcdbfee9648054d857 +size 55546 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61d2f610a580e8d854daa869e272f1e684885d45 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccd7fdcfc53e11be6be23f556d536b7561631a8616fec955fbb8b77ddf91bacf +size 27237 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0dfdde5e1296143bca8861b6d022db3cc541a953 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31c0e8d8f2efaf2fef37c9a055f9fdf8131b6abeabaf8b150cdbf7aa14896b2 +size 14908 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..409d79ba2f798002fc29ce80190f871ee18a167f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f207b21ea404f629280cc9d058a407afda2d62167e1cfae37ed3013b82c0b1de +size 44975 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cc39b89d737d76f136b94bfdb7a71746e748243 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a85c9fbb32b2e115c2e61cb421ffccf622d12fccc3fabeda31d7c73f32956d2 +size 49613 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2206535bc71bf64201659be26c8252cbb5388bd5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa330bb67478745626565259a36c6e66572aa9a3938fa02bb8488d943d1e7ae9 +size 71297 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53aa15836939e4e6fad46f600a8530b150c986c5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788eb12101a12a9001165156d099cf6dde469b0781527f2ca2e831f2a3cd00e8 +size 34395 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d45a2b8e7cea03f7338da36b80d98ca1c16cc28a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf104e06e232aa2408253688748d495d19c060ddebd50c3be64fbd1b131e06cd +size 39081 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c0b58d5611f27e6877fdc8eb07cc633400f71c9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70683d2d4128b76e917b057c31282fba65a84d91affa410d958e5d8adcf6c890 +size 76499 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_036/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_036/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c880336061aca99ddc1e8f0f11c0ebcaf9bbcb74 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_036/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b6b93cc925e4006a6079c728aacd9357430dca293ba230b6733321c8e45c32b +size 21003 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e32f385eba33e0fa67caac1fe02771a57c324e6c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04449f32c5cfb447415444a5ba57040701b8420c881e67e06f7a7790b764c2d9 +size 16755 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbb9258b28261d7a42ef263b773138ef7c595b3e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c1680aded87e1643b549a25f5d4f47e44e79444b9254fd2976bb572431a918 +size 74912 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf9d2f7c58a2c089de248140e38b183eaae26dae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67d81ea434a53ebcf2b136930a5b1e350af9ff84959cecc0d470f7dec10c07f +size 26657 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3350595defcc8725708523139a63209eb9753aa3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0144c2896cb4bc8b26f4ce0d0e04a834883f141720d3f9c970ed8493e413f4a7 +size 42823 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a64c646878e103e8a721aefa01e121bc4de39d3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55f86d5db0ee1a94c0c1a03fef4a5d656f7086a1c72ac326c5bd7c2d7f65338 +size 70282 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ed756543090008f90dec668887b50673825ec59 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23bd2ae8417fa4c8f5e2a333d06af86742976aa0cd70763aeb244eb758a4e83d +size 61451 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a3e52683af92d733697d93b192c8cb2cdef9bc7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:118b028aaac2c1534a59b2fca3985244d2f8e4ca66457b82ab463307de785a35 +size 55166 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43dceb46c222fd899ac60f6be6e05ba93fe8a739 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4887bae097ef7706f1cf5fc12910fb04c98730ab40b0de34be16e24ea4c598 +size 34407 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96664eac64888434e2786d2583ee3ab2efeb9bb8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43c966f9a799cae7cad3be1a65bb25096f130edf5882d4778a64e58e5d357f97 +size 43656 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49707b546f04a0d78a535ea6da8aa039946d00e9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5082a51541201bd7e437c92639d1613ef5219cfe287eae04b7baafaecb8e5c3 +size 55839 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61f3270814b1bec2e43284cc95a39321a40e36dc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaf60097a0bb115c0d4d784756a544a9b7b76d4a06cd3d4cd54f21046463f4ce +size 33143 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c4a9bd97f2859c479ed405b9c85878da8a8e03e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78f33ec9453f5ef844737a986ea3658607ce20a9f2ae39d283371f809b3ac0aa +size 37091 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c85338fb71277bb27ef1e7ff9b20b0dab7faa28a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:258d84c5d1e87bfc1ccc4b42393a97ec3bd30c1b5468e54b72b640f2ad0d1cb5 +size 32938 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..833d8f7eb488431c6dbe041fc336994c3d88200f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2181a887c801c8dcb5aa5a4e3a758bb832c7e51d70b916ed6c66d6ca1d727934 +size 18351 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afe51e9c1862f5db4cf3d0af24025c97c8ec0168 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:909f176e96b43a3a62a26ff9b8dcba1ae127d4365f0cfe01f8a9cc07cc4d672a +size 20981 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87dfcebc5cc3a0e09ae56342b702fcf390e9be84 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2eeed17413c7e11324f98f9693efd9310438ff6ac2134e5fd08a0dfda99e189 +size 13201 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64ce2de07f66f2f5d56cd6003a9604c30fdee91c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9baaa995277c94b2c93828544853faee9c14b1ed22be0d7f41de2ad6c83d98e +size 35191 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3f123a8397f7a46aa059090641372db1415f6e2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b6deed282337ed59c7c215baf62b83b02a2f9af051c5866288e15cb0dd3f3d0 +size 40314 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..199b9ac10ce064450fbbdf389a4dc9e395b22091 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe8dba46b7a33bf8ddfbd41bc194ca48cee3005bfc0f045485df08ad552b5e21 +size 54551 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7452e1d4e58d2506585a057cf45345af41ec6184 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26dd99a860397510158aac32c28c17317ed34726ed094d5de509db1b7d56c6e +size 26955 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a033cd6e9872654e609461c09aea7a6f1cb0bb2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12360b513d52d380cf1826bd5122b48a7eeebddcef2ca5eaa0f667403d17c035 +size 14546 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68928a2644a49609a1e8e8eb2786d7a76eb68e92 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02c8077400c18c9b6ef6a52544d3f50ebf9c2c53c9fba9ff387261e8a68acb70 +size 44851 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cceb66c3bed59e3e9d763599545aadae67aff0f9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb77a85871780443cbe4941e64844cb42daca9d54bcf3ac21d20ad6f233b3f99 +size 49746 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c65ae7fa332e84e112f9d6fa34ebeaca7fc5ef54 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de273a4798eff34ac268bd121796a83190d7e5d8afea444cbfbed7f9d86fd80c +size 67036 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1874a50367bb865b9599598874d3aecb6b9efdce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b8e6c2db8074be3533bdd2e21733ed2d3da56310ca40f21487ef6c3658fe87 +size 34608 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05426cb2d5a2d6e620b8512d25257c1365849d69 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43438006d61b739b9943eafb253117079dd66845053808e156c1b3f9c985bce1 +size 39163 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df84d7aa7f8b8e3c939193d1b379a468e19f41c9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbecf88d0d1180c734eed8512c282e701c19be206f9f645afe4c283e89a77fd3 +size 75035 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_039/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_039/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1a116e68c2315adbd525a407275379811cc4af2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_039/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6feeab4aa1fdbab452c58535fce57472ef0b5943aeeb0732b8e871fc28033685 +size 20969 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4da6d5beaac2c7fea7e9c39ba087ff4d69971794 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:087019aa1ab4b2c34157deef9dbba5ce47acfec413076660c81873ceef1f4623 +size 15627 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd18033e2a7747f48158b8013fdc60c07cd5fdc9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b9a122913904fec6dc984ec4af68eeb4f9b37a6596bf688c192407ee5944983 +size 75011 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec28a6b49e57383d9e1cb3df1a3a78cefd0b3ef0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:834807eb40259478b5731b70a44b8943152659f89dfab56911e2d5f2ebff7618 +size 26527 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5f27d90943da4f0590b94726b88d1c418deaa28 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3b76669dbbfdd834e046946f09ee5e934629ed03c14f0080bdb060d6a4fffe +size 43726 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..548ca6e782a4d9bd9d6c8f0f6d9dc79fc4e96615 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b6309b5f8fe6528a7e389dc576cd3d0349dbbd7e428e5b26ce540a77761b21e +size 73625 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2197d5b27a080b1c800323e25d7c716dc0e3356 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80b3663415a704e21338c212095c733a5a9aa020b072ae1bc16de4c5a1da64a3 +size 63425 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c11638bd96ad0c67b82c04047ac9741b5bf0dd92 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbe8c0704df77f72508c6e967c57054ac22fbb5a9ef690418d26eae16bc407c5 +size 68673 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ede74296fd0b5968e16e9728df9e352863ef4b0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f8e3e0e68787a0917387070ec3a1d2a6d69c19c9b1e2f3c8021c9b3c2337ec +size 32686 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63a6c53caa42487c0b4ce6cd6274f838f706880f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56ccc32cb9cabcf1902f49495bc1d63f181b34cfc460a8f6a39106393bfec19a +size 44037 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b688c16622a5c8cbe7925b466686e8b83a324476 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fededea8ca965b9dd2a085704fbfd7841036e6112cd232faabc4ff2217259799 +size 60485 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6ca27d9407b8deb31522722eaa1bfc408ace697 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899edccddfe9ed9671be49ce5e3c74a0aa9c47c25de6ee52a0cb30e55ab1a44a +size 32551 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a25eab9b104acecf90450aba8fa6152ba2d67ed1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826ee9fd4ae04e557e2c8a6a7d2471da8e98781ca34d76c13031f8717c573521 +size 37054 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e34fbfcbdd221e7c6d7b8aa6e57af2df55b89d3a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc077ade7c5d1527193130c8508cfe3d011830539ed0fe23c221e34a833a2008 +size 37756 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1242ea2d90cca3d8e61e1c708ad609b3a66359f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:890b3d8eb20c6fc0a6e118584430bffb35d3e94a42fcfe5e0349821df444439b +size 18908 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b16010ea3fa1883474cf89ee3796f7de79856fc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6b23b438e346766ef881819c2c99f9883a31041f322e88947814aa32759d536 +size 21020 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4381c5a58dd548eb34fcd8948389e99ba43e584 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2676eefe19ea05d5767c8fae4f3c1da4c2cefc7ede9553d8b5c7ff152f249bcd +size 13300 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..183c4a45319a6df04622614f79d4faa3a74dea4c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49603a58e0f019c0cb553752b5d21aff08422bbf8ed30f9a95f5a6517474ae4 +size 34144 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8741916fdf90c1e20e066bf689df1c977b2fefe4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c8b5315ebd0e744d27a6d1d8c035c96e39d41223ce013c5ba7ea3d1fe1954ad +size 40903 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3226619b78a640e1b3fa489d27c49fe7c11c1bb8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19da78c656f52be39da6759cc95cafd46e546dafd63a57b135da637b8cd56999 +size 54195 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a38d36cec6d8ddd2de1ca73fc38e28fb57c9030 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6021957a471340fa1eba7254538f0f2d93d0db73cbe640a9a3979afc39233bd7 +size 27196 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ee8d10d2f4eb0320b051b3ea06dcd259bca984f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26fe3241222b6cc1deb2c06210ebc93374a23e2938487979f1d0b20c1ccb61c2 +size 14648 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eae0765f475afb83fa918f7268448f285430488d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:851896f54af331634405df2088c8e0298babeb2226d4e1049227fc08c455f29b +size 45167 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..899335e8124ec63ad14a5f3734a13cfb7e9f6b8e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6339e9e79da65bd757a00267a57199b6899dbe7ed35a3c870ad9e502042636 +size 51037 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fb18de1ac80308dac8b0fd8937f076c5cad1ed0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a900f88006cbabc517739b44f2c96ffe705446f5d38dd5599fad84477d8073a +size 68162 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59501f76ccfc619f682fafebc2beab5bdb571f4a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e0a97cba4f37ff6b5034f567c3f524ad78c7e9aa3d2bd44be78fb0fff1e6ce +size 34588 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f77b2830c468cd51af287d2c69f7f046eee82023 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8082420269c0395fb1d2623361ba7a02a95b450c825568502ad987771e7593d2 +size 39009 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b536af288529c0fa52d5fff508548adb9ec43d2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5951c9b2733d528c86a5be82da653fc44693b002e5452d256cf3950e21db73e4 +size 77986 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_042/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_042/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87b7e3c81b72ce8792a69488826bf81b2fadaff8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_042/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:907e547f764165b37e68aefd0a63b8626215380df99faf7cd7187ea68337420b +size 20997 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3008d8649984a55daed361acc6438f24994163cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67a9e7ca162dc234a477fd47e5ae0e5170d1e803d5d2a4f28b4cce38b62a3e41 +size 15439 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03c327d6a4d734eae4428fbf7139e5fdeba94f0a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b740c58c168bbe90d792940ccb6471d3e67711002da22f5f71b447870194ce8 +size 75791 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bd8baaa570f0be95c96888f9233b0ec98ba7854 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2c2393eb8511391b2e60ad564d6fea6838d0fe8ce447f9ad1e12419734d616 +size 26635 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55291929761f8b3226523e2a164f94094d6beb59 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a11ba4ad12c072ac09af0836c0bb44dcf3e7df8388b534b75f0bad2317dd5c61 +size 43741 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3eb56c53aa4f966cc740d73f7cd78e9925ee02e3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a862bae3913639e77919c1bfc0261da41432c4da12ca6cf2016247c3e3e9a7ab +size 65225 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7d1f119a364c4f424ad91acf088fb7a2588b748 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:713ba6efdcbb993a4bba26e10284b9a7034e066d79ef7ae2e308388f988f38e2 +size 62719 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8238cc1b5dd3e123e89d0f27d0a1e0f0233fea77 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd40554bb58872c9dfcdf75bfeecda7bc7df7432ccb8167c235fba2837c307c1 +size 62156 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a339acf037c47ff99f0efb51db208a37b67a71b1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3cf056e2055c5f832acfaa4f35b59eb6e781e76381858d01f8181afcf1caa62 +size 35457 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c965dd6e583195c163894418fbac45258ae6e5e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c89777b11f7a303199157532459ebfa7b8df76b837604b7f7acbedcbedf427 +size 42636 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22bc78f0126ae02e7af34369cb23759a7cf4b998 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ef285d1a055e5512628d41561abe31c73d5f024b14486c91bbcc0a204d0fd0c +size 53305 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..259a90315db16fe0b75539653c41ea1efc2ca777 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63cf6958e3c949c2c0dafed7f96740dae3d3607d974e24b22fff69c58a535c8 +size 33208 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6c9cd70827f0f47733b24c0b025696499971782 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c8565fd39797913cc85df29e2f5cfc96d554dae77d14156e4b6eafec3a739e +size 37378 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7dde1ec65c1105e3da10c1460438908f304f99e8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d6029eca46eaf473af09dcfe9340b9dd0d2869cec5a9de069caf6f9dafb68d4 +size 30531 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2ee18e0a81a9a5d0c4e02f69255e71626270576 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e138c3716eb6dbb98e07f1d7ff9c1dbf8565f7ff4edc3153186899c77e86233a +size 18673 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb947f7aef0b3e4145d8edd5fad9ae5e1c854a46 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29f5862174735d1a22782666ec2aea11373cc15c9751b9f7c2b956c9d3cf3e53 +size 21070 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b73588bdff363ee79ea335369907fd3e56ccd6d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dc92b5d9b349b5241a87438aecdee9ef5e2621fcae08bf865193b2ecb10242e +size 13408 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2010a71691804c71a60ea3c5c04fbffbeb6f110 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21fb00018b932dd7f3bddbd1ebb20504f6460274403c4b7a2b9aa3b10595889b +size 34258 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c3e37567b07d4560ad8d47cc58ca04c6992c022 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6259d5e9ed31f29c43306747eae5959479baf3a4e73f5e28f571f8cc5111624e +size 39942 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5a657c5425f7dbb88656146ac7b2937523ce00b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae2ed567fc00e44bc5c4fba1065488fc649852f8118a06ec1363a9495a106d3b +size 54383 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a85a1abc2891bcd8bbf2b15c8bf6606766010170 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6d8e14dac34086ed3bc53c005f629d25eec2bba820df1733b449a783d4b61fd +size 26198 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eae35f505fb677a68c849c864ca69db2f38207a2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b56b0b8e4a69780b06c290bbfe1c2252b740d2a82b7bf77fd3f12ae8f2ff8c1 +size 14865 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..197cef2e2241633e01761e6af7dae4cb476d5f67 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae22f84661e360aaa28a2dd59b6c17a1e92b7eca343aaae760557808d5aeced +size 44767 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b08007b69f331112030536b7ca287aa9d0638065 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10d47d37b30f31bfb36bf14ce42f239eab60c7b73ff68c5322ee0012002950f0 +size 51013 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..183044c7d9cf974e6bec6c15430ef4035ed6ef3d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8bd7fa93f4131ce31b0db0e8e4f3d5bae1d6bf4e3765ec19302f54d41ea50c8 +size 67514 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64b7940d4120fb312658d613fb06574fed820aa7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0089b1658a20ce90a41918e1ddbaea4902f1cc1c601fc51ae6860f792f55ffe7 +size 34613 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44fbc794ab5f4f82cbcccd79f44c82c9a978d9c7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed60410b677c050a81e33ab0639ea9ad7601315def85b4da1b9266036463d7a +size 39212 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfddf1f868a4fb4635166ac744e79eb045dfe1cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:411b369d23b07f1f30b4b636ab94ebcdbf7fbadfb9a6b5b0802c18f13be2c0ea +size 76087 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_045/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_045/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c7843f3d047028d26df4c2a8904eebbaa4b39de --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_045/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9a56602649c59a7f13389dedb3a0186a40a0f4c2cdba88e5907ddd2d001ff99 +size 21011 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec591ea91279f4baa47bf9ae37d18ec21bca42d7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:332c4278db01a0745f2f9f889ed95bcf64adc256bd67650072e0817294b9d68e +size 16625 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc5a33211e37fe246d2422db1366bcb42f2cabd6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:563263138c1f98bcf963a9620e1b5d4eeaacb6ad5d6b1dc29f4cb887989a2974 +size 75054 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c56403ca7b9b3fc220cb71685e445c2b4c5fbee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5c1470c64b45a70b5eecca39aa5bbdc4ec869d3a9c5d870cd25436dbd24ff1e +size 26708 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57b2d74c3f5be4fb2dde7a458c5093ce124c37fa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70264542e5253a3cbc6cabe4556734fb2c6d95f237544053a8e68d8445ba852d +size 40869 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5aae6bd5cc7c90a81399be0f82e2e346dac963a1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5edf43407ee07af071bca279a148f588ba5e2e01e76315598986bfb1c5112195 +size 64832 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8147077bd15e8af13bad9d5a8688534c301be17 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05c1251f1394497af585c553e210e3371918b8da04492d34d11a47eb0e2d7f7f +size 62844 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1202041edc320703e11f59b9ea5fe39de389a6d6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:838b5fe4db9313e3147715c16f20049794fe6ab176fa822a08a85e3135560a17 +size 63988 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8230f83adda66391437fe994a2107f0992ec5a85 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd43ce0b629b474fb298b180a5511ccbacad8e234da18870ffd37bcc6034a385 +size 34828 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f071f46789d3936a3bc51dd3de0e1beb85c6cc32 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ce9270318b1a77445562579b1732d5a15d48508f731a18bfcf262cfd248f8b +size 45719 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..305b09e9707a93926327650447975ec1b4aeeae6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f5855bb3d5a3a1f5f6009cf3f2b8bc774be7b99fa3a721a8b31c53858716779 +size 60250 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9db52a9b1bd810716191e1c9558929ea46a391c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a086f22f913ebbb76f70dd300a5eb07778309a1edace8890139ef3387ce37bc +size 33619 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2dbcfc1aaa6ded856b495269a18ae520fbedbb9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3036cc1d76a752b7662219ab910538fcc451f3b9e899a9d08ad8cb6501906b01 +size 36919 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be89f72207c471c5a19bdfaf08a1632c9fec2bac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dbd300ee947a51dbc96daeca82ab78c55b1082530e33507f88cc94cbca4cdf4 +size 32155 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b764b23516281af69b83e8c3c71eb5436e57b0e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bec5928b11d9f3cbea40c574d84b2418608fa0714ff74711e0f4f27abe88a67 +size 18448 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8351d77b9059974cdb55a44e56d637bc8054dfc3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5bd464774d76c9ab794fe83c5161f624e56b31752d668dfc406f6a7519515b +size 21024 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ee80e539b083ef9fbaf05af374fb83c868628cb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c342647363c677a11775d52e176db790e60d8362597888be89c06bd0fa1775e4 +size 13404 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42a05c22a3f9d94559b71d4030589ca11f11e1ae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:219ddfe4e4e593be303677707940c167839f72ff5bc40ba8bedd9cd34ecb267f +size 33897 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38b52f52b848451287c13e1330fa6fe5f38296e3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32ae0175692c2bdb559f523961c55a7d72efdf95ecd77d12282469db9af10f75 +size 40101 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5d610cef0982b4ab77e76fa866f8570ce88d992 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0028f66ed8003c23cc1af436314b0f16283ab7897147acbd2af1a4d0a3b964e5 +size 54653 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1765148fd15d821f3008faa65169e6e133b46b11 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aa19fb6c13932b999da9f8f2a6d024cf7b63169d87e79aa998d4b3ebe3ba5f7 +size 25620 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6febe4f72e66c5d2d835770fc9659c930bd0b750 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85831b333ff9254d2a8537049aef0c8fcbea12777b1006c890eabea0da52c836 +size 14746 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b0dabef1c17308cd142c700608f1af9b983e288 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee17f778b38a9f7d092ef1f12fec7443dd55fc09290e29417b107f6350e0a368 +size 45391 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2847e21e7a0b5bdb61bc9100192266591d5ea312 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5746c91ef1f39003a023e2ad137421a399371dad9c2aa5717d53213f19524cdf +size 49561 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae429fb78e3810dee5badb6fca1e6d26dbbb7492 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:351b9e334f285451233a76a3f11d78572b8737c47982afa328a00750df521543 +size 66268 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07107cd0f0dcb5461bd6068bda65ca965ae611bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e74f8dabb7b0e350b065150042147a27bbfa6097f3a654ba60366073673db0f +size 34497 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10eed74f75d8f10eb8d61e63f4e5cc756472c05a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c97a6d165dc69213250d6f4dab9c52b75baea17b6a4b9d240ac6057fcfd8210 +size 38986 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c638ba1f18af23d8676dea3ee9e2932ec6d16eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:127adee5f502e1cda7639f1a35ae3ddada27d05b6e86e6e05c213501ba770089 +size 74420 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_048/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_048/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f6300244a88680b84b810c136f6fa08051adc08 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_048/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699658a63a5561fa1d930d2e491584a00eed9b29964bf87cd5729225bd5aebfe +size 20980 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54b7a56161d9c989487bfcfd096c1e01f002f7cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0abb23e2e7038faa5b7760958b19c5c6296e4c679743ddacfa61e37c1d9530a1 +size 17372 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..372c4491e8f2ea479f46e2fb94fe279a4907092f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f4c4553539122f798c761acfc5370414b801a6afbc08936c1e91bc827d9cd05 +size 74443 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f3677094febe30b37675f9cc125ffecd5a3eab8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c0b19c2ce66677f0fea7578cbf9d91d8302d679dcb5a033648f208860c5accb +size 26657 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9bf0465bfc07c203494e32b68a853e2aa835f33 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032ed71d19cb54050fa917fcf5b566066b704ef4c735402bd7a8bb3f3a903bc6 +size 43468 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b6b1a6bcd910fe6d873188b4d6bdc6e62d2f0ae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6afdbc0aefd51e156abded1aa8fefdd21d6115d137712835e504c548f6966b2f +size 69515 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d64defbcf472efd7a4ef1b4c683b6a06eee3f2e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf56205762b8d1f69fd5966ec6360950709b596f4c68d391db1ef02858e3cb1b +size 62261 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65a1484bbc52a13798d464cc95763787f2af9ca8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b308772f54c5e9713a488cc66f29cbc65fc34546387e18b69214a1e94cc29d59 +size 65799 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8cf2d0532c3797aa312f638c5338661adaa7cc9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc12fa7239e403cfb23cee90c3bc5b57d36fddd0742aeb6f632caf93c07e05a5 +size 34430 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..863f9fe250e7bcf2048dce191aef293db785eb75 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e80bf5d6528efae90ced5e0f1d9e32cf61e5bf9cc70329078e67bd703fdbdf +size 44273 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89e91812583c46b08f31905d317332642e2a2a1e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc25487d891f8675c73316e97f954c55175e6cc538c397b94dd8b3ab8c0ba12 +size 56196 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4a1de7a29d55b1fcf16f47b8dc1889886263bae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:139681e0dff37be0b587b8a443a3530aa9dbc4821b4040f3e0773e1224985775 +size 33163 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10b412b8096d82197edb2d932d86dc1b9699f12a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2f68f7b42a5f33ff722f301b59da9fbb166d77706001182e29b757ead1e07df +size 36257 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a7b10eeab2d81457f1003492733b1d820afea5d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c9775b4545ac8d42c416a9940e513cbe5ec6f831d55204a299b3100b4ceb801 +size 33417 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da31d847eb065c8f45c3b46de427320c40752c2e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cabb1c3c59b1a86c3dc720aae676ec6842ad30882aa6a318e21b31901baed3f2 +size 18489 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23e87e36612ac57732061bcd8f365d8f0ce30698 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddbc6256f35bf9cbafd9a63493cb3c1b8a5ef772a2fa7051e1ecaa3dfc42a7c5 +size 20985 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f454d8882330061d16d22aba580487fd907d3236 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ae7424e19700c89ee499e6cd9a92bb7f97b6a4eb50483fc5ddae9970d069a92 +size 13544 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..744be494c70f7ecebbaaf66a9685db84aa307e46 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e2b7c3ae881632ef7949dbce62787438421eaf3080d5d2d30b7d100fb762b1 +size 34716 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d76b15dbffd0305cc766b3fd4b2b65dc21a66881 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:524b33ee03efcb01167395d312b156de3b33b5571d3541b62610ffdd20e78e49 +size 39403 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3cb69ccef4fba09393f7a494887c52bdae39b74 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c6dfcf7fbaa57ae1922537bfe9efc8a7221122e0169eec8e5c5a88238fe60f0 +size 54664 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f5184f76a78ebba1dcdf4a559f4a4a88490dd83 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b73176153cef9c30e39ecaeb6ecdc356e0eeaa90ec1b57e8ed45afc5237cfd5 +size 27140 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..240e1d21de387ebb3f7693834e665e826b1b7cb4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd9608332ad8b675f05a3f0dd436b338baa5c76edec64fedcce3bec9a391d462 +size 14886 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d767c3a00261efdc6f0df208d976c88cbe862ca3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e6eabd5efbf55248b1546346b7c6990e485174cebd416808bdde1b65252877b +size 46123 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b047a719a4e71ba1130cb68fd1433f74abb58b00 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ab7c51e13ac502c62e0e66cdf4eca1f6658e0031faca186d5629bf5a392e91 +size 49616 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea704ff5214b9d63aad689b39dde842f8aa8139d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7674a87c7b845c224290413617d61c755fe533861fc3e56446a1d232028411dc +size 67512 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..651584dc4f14499d5cb4114d72c8be966189f6c2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b63af3294d523841186f138d1aef32d14d506c4744dd8c4506b1932347e5fec +size 34652 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81df3c22db39ccfb35b0f6cd4fdf770a9948f1d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0951d2f81c41f77177463c01bcbd624a6ddcdfe3e703ef5889352e449570ef33 +size 38907 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6093d6e9c1e162ebc3c70f54cc4ebfcf165bc865 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3353358639f989c279fe021f8cf9ae7c1258617d2f8ba66e564322d5d5e6c638 +size 74844 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_051/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_051/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb370c8c909fa83291c2de7e1259a451bed1d4cc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_051/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2822c5a4126a60f96d981f44b24dcf48e17d294bf495c68913255aa47ff039 +size 20996 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1af3a951cba9e803b5b87d9aabf007efb2ca111f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ec2040df1279994f917e31256a60b1181f91160120f10757c840cd4e998dbe7 +size 16223 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5b65e5a86d1861ee6141d3a7b878827bdddfac8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad03b9d2f6c2ef91df6e81147edf6767b54ce3da32f7299d2d8dc0a66164f6b1 +size 75699 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fe5f108654c95d1b8d9239a36e67b0285dadb2a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a3217c51832823b7142f46ca0a90ce36be541f4e3f0e4379f87f70a0c02ace +size 26420 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..898e56d1f3ec7505f31beb8644430d6dd1287884 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91bb245df2eb1a0b1cf9e3c66522541740af715aea5bf5f3d080593dee84ec43 +size 44350 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63f8af5c3024acaf90bc179441c7dc310cf384d4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c96a2dccfb18a5735a67aeb7580263e4582c8fc7f07ea40e74d2cbd99250afeb +size 57308 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e94e523192bf43effa3cc4b190547102cbac1263 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fbf0742f81712d4a0ae12c6a08f4fb695e28d98118c5a0bed92fca672c6bbfe +size 62945 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2e297c4670b57bb902928aff5d367b8c5177a70 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06a6c2ec23fad012341ab57d2ac9b4bbe3711eaddc0e80b29c1e0ef9a48fadc0 +size 75477 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c7be9984e2f6f3b9516c0155ee4bc34c9e22820 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eda2c8061cae118f7a14bd90cd320b13c3ecb0479597d4629a5d8e0cc3295b1e +size 35567 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2b58818991c374de4b29252f309c1973011b690 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c91201d4aeff450020085728f926dd0496f4d7abe4fc8321410ee430224b9c39 +size 42851 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..779196b6d496cf34b2f9170dc086c5d835a8bb2f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1867f91708e23c95d52b806504a5488a63f5bb0c16da38048045503295a96fbd +size 50902 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff91da4d5bec3d589c49f2fd7c00e812774e0d08 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6bf47dc735be2d7904c7b727f66d584969ac8f1ffa03bddfead68a7bcd2b77f +size 33165 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4449a58324061f176f52379c04bfdb290ad57f06 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62fbe0afbceab9796dabb628569399d57c9446398ce865e777b27b632b2109ca +size 36301 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0984c92131c1953e0c9312d19e46aadc765004a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd3e58f6551910a47ae0f28b50f056b17d06f0c31ba4b9af64f5cb31ee797ea +size 31469 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b21b25b30fa6340bc12e2865283e56bc70432515 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2736c317e2efb6eb1dbc3e97d6d959b82b04e3ca99b99b81d251d19ad2329c2 +size 19676 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7ceb3cfe881cf1fbde8cc76d2971b7f5d602216 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f453bba8ebcfc7182dbf7a87166cb2cd49d1477b688be33ae06136d49a9407c +size 20865 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dceec239143089d89fd53499b1c5ef9aa7d34b37 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d39f6b98c42c534f2fb9c10de4b2c25c6b6ba0ad572353ea0fbf45816f44c01 +size 12752 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d62da10f89ff5ebc4548781c6daca333a9cb012b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f730fa0b52bd5a17abe5120802a4932956aee113971b969cbef2ac9f0829134d +size 34089 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a7d9de47adaf77de7e176b1c3d4d6578b051ea6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58cbcfe7c3cb89851440088b6895ac9186b7a8a3703d712e6aaf0d03e3e3e78f +size 40632 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..accf24f7bb46eef0f900e3a427cb407fd56e4b66 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d3f723d3e694a692ec6448a3348f22c63276ab77a13d1ffd8b4bcc0f66db81c +size 55609 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c795585d1b5c9897d253aaa0ef3512a95e82f800 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0e1a735f51e83ccd58006ce842213331d6c1e339bf09e87824de1892b91d74c +size 27133 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b5f08f66d4e71e392899cdc4afcf53fe903be67 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc04d1d006d869b7f3e9d79fb46f45b1bea83911416fb0073cf71c2b5b982963 +size 14594 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5ae95463472a4b3bc3d8f6fe56d3e8e55b20535 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37adceca8fd9f29541b186fde50ebabfaf4037cd9acee4de08f13f9a56ec3dac +size 45403 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdc3f583502f1212eca751dfe3860067f5ba4419 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3cb2289a85a154a9174e0c4d274df571d11fde8528984cda5fc4cb102f4fbd5 +size 51467 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..502769e2101f97b0cda83dc3a7a77acb759fb449 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:490b5e5ec8982c11a24f80e044cde3adadd6ff1cdbd8700faec7ee48faa016ca +size 70147 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a1deb978a2bf06a6a01ddaeeeffdbbe5fcf77a7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f6be58b60fe57e8a65bfccbceb855df9c29027f78dde988738ca85ffeb6d1a3 +size 34863 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb7604f46b0ba75c13a809c282de6b388ececd58 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c8f10f11c2119195f102d7dd029a77dc2cc0050e62fe4b1e82e695408fc1c55 +size 38985 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8d66b7986f59fc03ebf296680fe7676ea053e63 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0c4a19fdcb41940c2cf2578c6495c53c192c3f2afe55ef3af0421b7aac76da +size 77848 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_054/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_054/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab0ea32f70aaf825c68e08988c3c1a795ffd92e3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_054/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8767990c5742b9f88bea9355f9feb4e3cd36aa13dad6b72b279f005a39b4a49 +size 21014 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32bafff407ea0244e08ed15edc4ddbdb692af515 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f179bc051bcec5009244debb10d29d304f12084b0b43587ef6a23c485ee133c +size 16210 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..266a653a1368e65e93f35eeeac724950bd5325ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba12169c791ff3c858348d46dc3eb5e0882a35681c273455a54f7a305e9cac3 +size 76273 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce3f8574289b543f2fc273d55b414ff3efbe6ef3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2fb55fe67190a2f84e2a8b6743d7193e8c850dbffeb614b112e76871aa042d4 +size 26584 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f5a5dc3ccb5ca891a718317d094c1f42739e61b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d40033e2f8aebfabbc00de9fda69bf6a9ae71f0b3a20f9888b4f17db6516953f +size 42115 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..181fe5468b11de5d7be6f0cd6b571cbc7fc501e7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c30addcb71e03a8bd14053001d28143c7d9031f337e6aed8eaa34a2ae3aa266d +size 62575 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1d75c903689aa487c752ccceca18154b508c379 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cff9dc358cff8a7e147ebc0fb08bc5951ac12480d1b5e3425757626ea90748d3 +size 62307 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40698e665926efb46deec97fca94b993ca14e225 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c726bb1c84ab3cfaf765a76ca77bba808ea5b2e22bd260e12774366d6c8f16c +size 69316 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fafd44dd4fd0218d0ad26a78accd1c8c0a9a8aa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cf578ae09270cbbd269ef8dce16a07bea5847629104b75d4270b3446abf808f +size 35936 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c16690e6cf9e363e0c2114657aa2da2eba674b7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a24e5fd84e9930c9e8fac83ee9432b86acc53984135a10ca1caa4dd04c69b17b +size 50590 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c7690e36ddb251ae0a63180afe3b321f8901cf8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46e0f22ca98e3dc7bfb1665ec833745a7a297a50369f5866f568c9cca904794 +size 70855 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad9db5927f587a660a8fdd927599d2d435a9d220 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d95a1c2fb04f3ac4de2bbcba7621b715f2deb75807b4747d1998cfbfc7a6889 +size 33524 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d79babd2ee6ffc69a2e52b659483bc5704717b72 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2709d2adbcfcf67758a7a765663ccff82d1241333529632961f38fecfc699668 +size 36062 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b04f04400239c4c346dc7b1ba086c01239c2787d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:967f9fde645ba9e8220d272f79891c6bd478d8b19b75f662ad667ad84f84e396 +size 29417 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be638cf9285426a6cd9bae416cb56a9dee57843b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:378a8384860c589ff3e04ac6d5c2992b7ed036c7c6fc8bc4a7b31e46e7b23444 +size 18650 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d72d48edc0fb8bd4c4574b736e2eb5f25dffe2e3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb0fd869e153204baa0a8e6837af9e4f6901066e4082018106264497fa01b038 +size 20916 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97f93ce134a74c0c4ed0da77260a346d20fc9745 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd0037ef0b524e7eee3302b334f106ec67e0c03a60c20fae14631a325b0222c +size 13204 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f173ce46fea62820ce11c6690db72f2d808dc858 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a81acf0af3aa695282424e40548cc841bff2a51004a769394a2b42a963ba6bf +size 34542 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..884b831d4bab99576b5ab9ac073fc0a47cee3ba7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db038b93d6fc2607f5677c00f241cc7e34632ced88b1ba4f5d412451c5602b65 +size 40559 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22e66293151e5e06064fd849cff60b2c21726ed3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e94d3476892ba61ac15b8f14d9c98bdaa21b2fd8f6da1edb04ddf8de1e260d33 +size 55383 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ec12bf1ca8fbe6a84c36ce16ffbdba2b1a0eee1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f30a3f7fc0dd8e148c535cecea3be5e32ca00cd1f1aa5ee55722044b96f4749 +size 26766 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..add30fa3639c330dbde36466e0397ee937f20a78 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68586956ef7c08abd7846937b1e1ace860ace1df7f6cdfb07ea3b397af920d3e +size 14810 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..828b921dff6b3de4913c218329aa157b121ae2cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:925caf71ab208655ceae1d3c6377efd27db9b1534f85177e9bce3a18545c91a7 +size 45220 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95678ca0cd4579946d470881fd8174c14a7c204d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b98a0626364c37e75bb4fb76cba180bd3e65bb6447382e59634be9bf73a7e8a +size 52814 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d4b9123848698345f612a97c9272a3bd0864c4b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b7e90766cb973f8e2bd7f1db369e93f54871ae926ae4ac690e6efcacaf3fe12 +size 70574 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b23ddb23aa2d9e1b4380e733385d885ca7788157 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c1e9f47b81114e210168553835cb643eb220eb66b3f83bc54414377ed071719 +size 34799 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0bc2a74ef5bc07b6646c53ba659ba1e72b6be006 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc9ef16e93637e54caa74953b81e24b0e352e2ae99c8d11b1c4c54ef7c1db056 +size 39218 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bffc6edbe947b5d736eed7a64c536c8de67f5942 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac5c34da8b36196f6388c87910f5567ce6990aa032e422a3f45983077e93c4b +size 76983 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_057/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_057/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25ba05b7cba6ff528517a468f38b7ef126bd2131 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_057/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb190596ec0557d0b970d930074aedbd8c4af1403825de474795c89e65d08240 +size 20982 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9f7b443b592ee9ec23341c3743d35c565f4d680 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ca893a19849759a07466c4338bd8ed958170f7edaa0f3208061225e5a41c11 +size 15613 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7080d4e88dbf25276a52f8dfe88a522f57a39754 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:907c957b893f723a71bfa752eef1e8bf75754fbef208785aa939740b236d47f8 +size 76990 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4383a0b9b8d7199812a83b722f8404856392bb8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da1871ced845064a740a4c1a534bf7787a7c5ec633042157ceee118a36820b5 +size 26501 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98c51ec7d1ad0ea408b0b53cf34f8db27629f396 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668905c9945d9168a7d314e675bc2d99c224ab6ce6eeec2ee785dfc41267b368 +size 44500 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b96831ee2ed916e2314b241e7f24494728d337b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6f03d75ca7c6a899f2fbeed53aaab770fcd8f25445da3e8de53374894dce462 +size 57425 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d1f9b1b9f17cf84198a62aa4acd8874ca5a1de5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80b908c3c50878dae7b1158b317654155d862b4eda8a9226f0505c5971a22ea9 +size 63320 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5652a54aaffa09d4b45ec3a287ac192aa16cd9d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:527f1e9356b268831391a9a446164e606cd682231f77154a73ff91bf2381dc72 +size 69276 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d0535a2ec4d347c7ac44f913ef42afe6bfbe9bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3f37008ff5a060ed0a2211a1035f47347982f708f034b7ff61071ae3f22196f +size 37228 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eecf975a754712ffd21440d22f4fac606ec0d5b1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63e63bb54f506fc54d908d0d3c738f747361b6b10cbd44994aee569f91bc1390 +size 49311 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a35edfc5e6d221310837bd071d64ad8f778ba86 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed2927b598158a76d8e386cbf7ed2248ab5e294f3bf400331a25e5ee28450df2 +size 62111 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d71d242b5ae046449e94d439e332ef1fe94860b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399d5e975a563353b8d2ffe4b4e63cc4b0cb1be6c2bad19d252989a9cba37c8c +size 33241 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2533cf4d5298e8294b04bae5c755d4c6b6b6236 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ed5a0d82df2663ecb08790e2a931590881f5d399d1ea8bf3d36ed710ab67ca +size 36780 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57ed28afe8bfc1813c8cf041aba24836759d294a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae9e88e36584294424b6dcee668babd252a32bd9867dd4d466f7101af3f10712 +size 26734 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c195be806537332f421e80211f80deedf01835e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b9ca8e68d3e1e969e0b76eb1c134e243bcf4b6366897b88f45113839224d14f +size 18336 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a8ff8cfe782362cd7c4745c4f17c54e88f709c3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3e4ef4e79f360bc5d9b61f86825b8b1bafac0858072548ef1be046d71f090e7 +size 20914 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd10871e455a355268b83b1396f383e7f13941cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67fb8865588adc427f092d2e213881f1205e2b8537802002a8fddea9f9e2d6a5 +size 13042 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..235b5b8d8767af1aab408d8017cde0517bc10e06 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea77775d5a46f85e013c21e159102ae5e85fd22b0773304a25314157fa8b0791 +size 34547 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71ec98bff9abfbf4dba205e6d70d64d432162af0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a31418ea5490d6e7064f71ac86810dcb86e0b2cc268fef76ad504fcc1b0c5711 +size 40175 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4dadd6412b818d381bd09bded1b17b8cdde29726 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c3b59d96a56ae4cd74da6ab7c71c3827ab36d8ddfdf5a2eb4464a50122734a1 +size 55344 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24f3a9e357c4683d31547e5f8cd81e3658a16fee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4febeb995aee6d3427dfa844122f74c968d92a29a34736313a7ca9540c1a105 +size 26285 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcb4307735b1f1bec0de2fbed507b6a0c7e1d1ef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba3c826e358c3aa22a098fae20fc397e11807ebfbf36222cf9aaef8b734ed31 +size 14705 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f023681de02c8f1137f1a212a4acfaf5e1e76929 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7fbcb97795574b7f99d574c8c91b27927b7c5b65cfd98fc1b94f20f56a81698 +size 45184 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b206523c0c8426f25b079870fe04b0e19c85661 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e970c54285f57b5ed3ff0758e7454e319fd85d5e99001cf7a6033e450ab41ae +size 51377 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6152971b66c8de35c70a9c9f3a74bf3faf8826c9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090ecd36f3fd5380d4fe1dabf9e6d930d94e77816e58a43ea762c0ae6ac149e3 +size 69182 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..471f7d75d10f67652723b976f66d31eaba03d6b7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:011ae58bce52d3d658f243832f8650eb734d84a756d890dae63764319ca4e75b +size 34718 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b79d72ee26e9b85db13ed8cac641f83483ec0171 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37106b45b1483c2f6ffed92917b228aa43c22f1b7925474e60213fbd7fac5338 +size 39251 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a6c42a8d6cb820935bf96e56d39543e107770d4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0a349d41c247d3295fcd425aa824f688444d600ddda8ae7d347c0313c916f15 +size 74250 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_060/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_060/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a36e08ccc465bd6c0da7aad2c297f66b3b72728 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_060/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:930bb854be08794e5b0e45e6014ae507d275d8d7218eeb23d36130a5e0635df4 +size 20987 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7c43faa56dfe28845434d3c9d2570d9d1ed33fc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1225912b6d3447331ca40c26127e4eae9f34b5d2b7601dd03286e055636eef30 +size 16269 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73aa72377f44f3dd9aa310e28c9e362fd2ab55ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53deeaf4e065bf4a141954a52a3deffa69d208e373738d650da898ef0145bf7c +size 74351 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cf9e7c6092e4db59cc33f979dbbea4b8ec8f1f4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15cec05b1e3afab83bb7a1d74d751195463f322c282a67554dd3c2e3dc66717c +size 27098 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e41ebe1ddb01e74f5acfc226007c4cb4d2494f2f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13ac8e4f3a6029f13a2e6911321fccc5c171f229e562bf3aaa715aa7a24630d7 +size 43824 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58ddb7eb8fa4d6c5cf31497d6244a1d28eeeee4c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f87116bf27a0e9ee0be2750006e5155a952bc1b3531ed8d45135ae3ac99d64a +size 62750 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b06b3e24364c37decc204819036230914074ac3e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31756738471c9148143f2866fb6a0d069b82fd5387f15cb700482765eca3d754 +size 63224 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e2ae347fe1aa4c1298215033bdece87398d3bf1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394323b95919dbbb7b6b516145befed980ed5e06ecec5e085f65c365b70c1884 +size 63738 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba58e748c306612c42aaa4e5fc35ea96d4e1b5a2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f480331e808b154c6b2f7a225daaadba2e0f61b201d016359e76c800711e88 +size 35323 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb30e4d8dd62d003b8e13a4bd396b2655e7b7552 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee6474fb56f37a1595b4bd628543d3941922b4a1c58319a0ae71c036e4a1d743 +size 50622 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f575d8b41b8cebd79fe046fa644528213e23ffd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02052ace44382f74ed14f4cc1983d1fdd784b2506ba206059c39a0712ea44877 +size 76082 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e206eb87b71c6ce06f501da42a4a418c7b2397ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9286c658404311060e48960dab4e6fe9b9966d08ebc747fd957edbb9ec04e641 +size 33337 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80695f9e752598264662eb97604e8e1e3578c9b7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293bd490c9ed2cd8709c8d645f3d449cbe5f418abb8291c6a55433c7978c3306 +size 36161 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbedae69a0f77ae4f5cab3942c8b7beb96ee7ace --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c3433a36497c3487675ed42b8ba3b493b2588bd1cee6199eb7f1b875b65f2b +size 31464 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..506870eccffa098a8077d278598e273b2cbee07b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:598cd6562b644193f82e476e97cdd0b3104acbbce30032e70cdaa28419b8e75c +size 18619 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..585162eb50a3dcf66d8957ae2e6aff392f4627cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ec1e9065a5f462c3f0eb0d97526471adf6ac3f782a61d0658874892d48e5e00 +size 20939 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e084c66e3cebc63d52bba400eecd9b96f75eca52 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b142e340489268f2182d229439d64e78721cf21fe583884c3df139ae02cd111 +size 13125 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05a92ce667329986345e54f23dd7278ed9bb0489 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e09d3a17227ae6e1ea1653ea1b351b26ec34f76dc9fb7a3d0da456ad503bca +size 35551 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed440242c70443bc0b986370f752b587945e21d3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:283d923cb60ee9ca0e0fb509bf60533e7bbb2a8c46a9b92dd4c33ded697606b9 +size 40322 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df12ff85cf6b1fe1168cbb0c04f8dc7a6bbc2986 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f337e8c1fbae292d8e6643a8255f58dd1d5fd4060706dff48ea5d66444303271 +size 55429 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea801c7c59406499add39460a93730dd9de3e912 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08f899b50274b0325e1199373d9fad64e8ddeb4b2f27f0c6f36c55718c4d8ffd +size 26343 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e461ea20b6ccc8283c1bcefd02c0d3438363477 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edde7bf32959476ebc8ae928206ecca85ee13835719c32cb6c61f4e602660710 +size 14838 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..489377fd43a1907c4703077653518a9cc29fdc87 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39da0a9168363a373dac00e69e8295044e351febf29be9cf5758fc15538efc37 +size 45816 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f02675917fc335ee89bad7796a46670f2616d9b3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11339c0707ff79fbba86df3303dd8656d50b2fc118f9531d2503c52b86871393 +size 49866 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4bd074adddcab1a1c7d03beecdfd013f3a75e13 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d1d47df0198ab1786a79847c3a624132dfcf0be6b9e1d7a18c7d9e25571787c +size 64883 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6abbb8718a1534a4c3cb192ce664b679670e61fd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d2b7e71bf53bd7cb747f398cb5a4b3049d0f11880508cec3c837eb9fcc18928 +size 34731 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa9d32d2c4e3e4f2036059414ead1be49bd22c86 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:390becc0015d454e7c87ee7a3500b9437d4965502b4e875596f208c6187e56ad +size 39217 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2131655576a83496b46ba5dbfac0d6b6c0a6e1e9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e8634904474662967d8bf4e261df9a82097d343237ec601c4bb3f81db3408f +size 74582 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_063/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_063/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..387d4b7606e3128e6eefd4273983072e5713657b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_063/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:818392131a4f8e6ae4e94b4c4dd2c023e7869ba9e26c209a8a834c423e54f979 +size 20938 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..807577cb77ce6ba28e177b574b6b6ca3950df2f6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c128ed8c5df1d8164b002f2f56ae78987a85c7565d989177069366d6d5b7edbf +size 17129 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbdca0f22b81a05d37f5644ad372cc4ce42cad98 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d4965b0c8d321e26669ffefe32bf9ba09afa5962bf061cd0cf2431a50b7608 +size 74068 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62b16c103619f60dae0441ca41610ba1fd02d5cb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff85532f3527bf3726cce216675aaa6451cb137c9192919de56e3bcb41f3b49 +size 26559 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04b98a75dd12908fd949815616dbd547237123ea --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aecc9e5c8581f1607778a4655d26437cd147d2b614c72c62a773e123ebd366d5 +size 44651 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c2e0cf4df7a1a3b8f0b777afbc4415f238630bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abc59c2035484c9b0e5fc86f206674f0dc977a2db8ee61007376486441b93a4b +size 58277 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3dd52c7e79b3fc2e2dc4bf212e09dd6ea886a22d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55e60c58ba11a22137d69bf71cbeb71e0ca41e4eb394dd98030b6c0d0e381728 +size 62306 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48dbeb8f1f622624b96ba5e2879922bc2ed7adc1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37c07e5d6ae4c0cf970130eb8b01d074b4460168acb4b4294e67401fcc591ac7 +size 69700 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..672ffcbe6a6539a0c70418caba0696a9e4d66cbb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b77912acf7869b0236b251e8e2e5244ff36b543f1d8dd5656bf8e40e9b0c536 +size 35257 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51080352d99490cb24b00358f00ad4a7631cd787 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d917cebd2fa6b2828fc5e1c8c3094034029c29a22e678eae3b4906b7350263b +size 48366 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca2bb5354c7fa7992500d3ba97012c234a146fa7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9613f0b36af25cf62fd835cd57633073e7f31ac786da1c97c6f871e19d2d425 +size 59194 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49d7a98fbac56430719ac3d2c5ebbed4d08682eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94c26f0ab210efc537dadc726aa4d525a843119920e7e6e8fcb81368f1436d1b +size 32870 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d60c8ede479574d243432a094c4b3e6b8aae47b8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a4e57acdbe8ff50a7fdb31301af62b215808e3f369324becf1e222816eeb034 +size 36073 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32e662c9919376a70d547848c7aa1ace274aa48d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdf00f7d03800a43036f0d89d291ee3486d00eafb843bf2414a13d261f922819 +size 29247 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16acde5e64c8d51704f95b44889a19754b03d4d6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6f3cda2f77362d73dc6af921c6debe493e9dff7f1ba0e4859bdf55406e9b18c +size 19120 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16a9a64726e495e8d697c7e78495e1f8a8ba84cb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b096fe832b608a859388b8d38dc29a0cbc511ba79015fc91033217e0de499d1 +size 20802 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..522a12ce861e6c2d36178d4c6c019a356136dcb4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892a6b579e92465292c4ac03fbc29d874e3fbddb98667c37e2694374655f3e86 +size 13239 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ff3ac72574c3dec8df25d77e95cb9aebcb9eab9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5476b7a8b9034f226d80175016c3c0c6ed7ae3f120f8b588f3eabdc3b1e20746 +size 34725 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd56f77001eea1456305c53605e0e1b8c576dc88 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1366e9ddb73b928129292fffff2696ef2fa548f75cdbaee26cd66c66cddbaeb1 +size 40643 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca7662331894e645fee82c517c4208b7d0307b33 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d55a1cca4ccc8ac3903457e22f632470cb1ae446d1d6570b209881d4473847 +size 55499 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2463a8e2a0e517373435c11d6085421c906f51eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a450bba3a02380d5f77cb6353722826a7f175bf279c63cff96ea7413f5e81b +size 27170 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..235c4707b40da7b419eb2d7b6c4f785e461d9988 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf3d7c1931caa05197bdc28ccc42478ec9761c9913d88661fa59caba82dbf99e +size 14934 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e04204bc8d3361d9349b616ced2980ac3ce3799e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a09dc83b0645457e224bd81edc0ccd767c9000c0e16d0a5a55dfc2b27de557 +size 45277 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58e889876dcdba1fa97ba67f3220b7a4c97905f0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8975e76ed0dac7979a8c1731ec4ddcce492978d6b1b80714e50c5441064dbad8 +size 51788 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa0154b167ac95b4a6c70b094d702c13ee741d78 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc317c1794fe716048937c145f4baab87c070624ad78cbc7c3df4c5335ed2993 +size 67017 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6371e3c7e74773e6f77dd3fde26743647178318c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:527f6b5403c2f9ee4ad1484cf95fc7c85e2d4b696b7981975841e74304ac2bfc +size 34619 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..660d2785bc8ed2c300eeab6be745b57adf8ccdad --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e990682350c707c32317d8f9a0cc7179075ea63ba15c049d6203a58a3075b695 +size 39002 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2be43b77fcdaf366ca45e75cc497557440a32e7f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03cbba0c154a12e6a34cecc2022343cc3a173d29f137ffd4fe9db1012b94ac71 +size 75760 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_066/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_066/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ceb741308ce9c1be1ba5f1c626ae24a0920a9b0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_066/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:715f94e611cf65e605d0eff3a41ed19943a41164ecf09eb68a2381f3381dfa51 +size 20990 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a6b0820f9b05bf34971582cc379f9692f5f33db --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a923aadcc25404f1e046e54e82b10ad68a0b272e7fe9b809d8afda804a11b6 +size 16974 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4b6f8956f3f695259f2dffca025b074f47a7f7d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e9fdcd734975a3cd1bbe4c14fd67cf0b96980974c58a9093e662aa4aead68f +size 74909 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36c37ee547a0e88003ca1dc606c9ce9ecdb9cc31 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b716ceedb9d0b74f6fc466f2036afec32302ddf6a5e5b2aae4b09ff3011dc84 +size 26962 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eafcfe2033f40022f17d5af95f8fe995b19cf309 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8c089255ad1c1ae4057611b3cc1ed36c75244bb2296c69c5d21e880f4756af3 +size 40979 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c5a1fb6a2f2ee671a5105bdb9e796eddf11233d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95d827fb89fd3c1fd15b6238c29b3e77b6baeea87ba7785425ed18eb54cf7aa3 +size 62821 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f35bcf793a17606feba6ed24aec8b87f6f38d3f6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ed0242031b6c932d3b218bd3bbdb67dc96a8fa3a14a463fd68f50c447d50cf2 +size 62468 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6211aed06fb4529813aedad59dc146928d7a88eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ef9099e9236685903b9f64c78b58484bb28ca121f36dd7a6df061c082cac3e0 +size 69081 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d28893a7e08d991b1de8ebaa101cca01a1f171d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d0ea300365f32d6a6c8b86a80e24a508e84120bd3466c6b161fc93c5aaa6d35 +size 35386 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18078181e84d17388fc48afad1327148dd65d6ba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7de5ef7af34d2c8bb03aecfd440eb48e3dcaa8245cabafb6010a53db606aa3b5 +size 43673 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be0c20e401b58e8c45d3e02e43c8d40d6ffb1229 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e317b124f491257c063424dfc4faa4125299e4ec89ca26872685ba76f935952 +size 54299 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1af2454af012fa050b78a2a070cdd1c1ff5d7d7f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b180083d33111223cdaffc34d762f13a40635b6cdebb15d7d419d85d619a34de +size 33383 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b3d46b51b194d03abf9e09f33bf92da2d600ff6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3123dec7c88c8cd733d0e4e017b655c709dad243e214cdcba28663846352cd3a +size 35531 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c2bbb3b2e54747110b0f629a3f481527591e2d4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f91fed1ba9e94bcf3949a463fd7a472f6d3aafcfc31707e750ef7340cae26504 +size 28516 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4d14ab591fc76a0bd26ab0890ff967288915870 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be2a868d9a6a44b24dc15a2f4b393bdd24b1a7922f18f4bb4de3887c6fb33648 +size 19273 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42d018c3a5325ed25928c53e448b0df69b54c4b1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21607582ede9a1fdfd7bbdc0298a8e1075992f99796dc86de5d3a414e78ca940 +size 20945 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c895935e34c13d4f825022e31ba198df3ee3892a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fa2e506b64bc7690867087281ec4340b0cbafcbdc18972ff281a40bee96f4f8 +size 13257 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b402ee3a08146ead47044097756c51a193cdb87b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:164eeb516cd4185c39d9055659a60bd73b0151ba6083339284aa826150e4a3a7 +size 36036 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15e5444f6929d8ddcca926bd80ad4aaf5ab7d9af --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2e1095d2dfde821271c9469e29c4d253abbb8bcda25c563937c58e3d31a0dc +size 39615 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9b8748ded344c96d3aa17c2c6d861a3a5cb3d6f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4558e40776f93167aa9ecb7f005fe163d16508873554ea69b24c5923edcbffe +size 55041 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..722e2c82b0a35183631b06b568cef6b6e0b35648 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8732ee930d81433eed8c1b553f5ec0d6a98dec3de004dfd221c440be05150f1f +size 26518 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2daa2ea4dd6a8890bc447097437a420d0cbb1e89 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7275395e15a57bf699eb7941be70f6e4c1f6700957fd14bdc7adb08fc9825920 +size 14824 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..658d1e1aebc82cd759f2dfebffd9540dbad09c90 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b01041eb75fd22395cc27bdba124459a81aa0ecc6da349abff0a5624b8fbf42f +size 47430 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27715c284320824be0bb7a26cbe36fde5c746ec1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32cfd6bb4e187d682510e1d83cacca4f37f14aae2a9bc1f5c4f702b515cbec68 +size 50920 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e8624a63202158b5190b1c1b937bbb46baa519a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32bc13188c4efe9140a3b3584489e6a1756138614502c8273588760c950b9ab4 +size 67819 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66b8c40b6ab51d05fa2d9951a9668ac0b5cbd24f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ef4dfbf1efb5fc5b733417138aa58e5f68b016f50d41994c5d314b17d57d51 +size 34652 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02ada28a401a5c381d067f2ad43629c73861235c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc103bc0834cf33c9b3c26268610fa92e12e1ff16b7bff92a57fc39d9002911 +size 38705 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afee26112d221656be6da01ec9b30587ce95b759 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d5b47a98c8e14ed1f7d5df50ffc7af8bd2fd6c96a81c07b674807047671a1c +size 76183 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_069/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_069/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9d0ee097fc7cd6fec6b3ad8af4762d8d104070b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_069/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9254f9d14950259be588f35399ea3fc03a300274fa0c931ace27aa2d4cd1669e +size 21004 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..890d896d367dc4f7a6281f320376098fc2c3d7ad --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26be91373976bc28704292c6270a7b781b9e872dadd7a184d441107af3afbe3b +size 16520 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64c812522e55b9167730ad8d993bcd353d5a3664 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ae572c85ae60d6c5b984affefeaf309f5b1d6025919ff08afac9ee538f1017 +size 75432 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aeab18755791817f11d29de60de85dba77a24593 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0378cc50554bb834e353b00f01aa1cea850700deef85c6e17003b68a6f52b3e9 +size 26862 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d78ddbd7196da4f448427a4593e0385478a538e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:947fa16a15531187124ba91fe12ba9b6971e3a32282c96a753e6fbc113728bd6 +size 44746 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfed494d36b9f817e715c08f6865cfe84eb790b6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:028c14de672524b04d3b429dbbd4183c7b9b2bfb3727551eb7d6b0e51605c3f2 +size 47963 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..763d3d2ca29aae607e32f41a1ee59dbfd4adcbc7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:711354c1ca055b284ac27885ca2871a61543d8afe3a3d48391047dad4023b0ec +size 62176 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2da84840c5de5692ef71a1b711bf783de9a916fd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a48216be0b3930003dac7ec3988f57a229a71c3ca002dd7c8c7d77852f8020df +size 73866 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48ae8a4d1a6c178fb6aded28cb2ee7656c93abbb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3b752083f443a7066c5777a125c43eaac829025cddc9a14cb39b7bfd02838a +size 36029 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea89fde2c16a0d93451a3844f040639f3862f962 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bc830cf01b574bc60e40db6a1b47153d0306ab5041290bf6935d22c277f430f +size 46790 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..717b3213fee6ec013c483d435d779f064b0eb943 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a178e482af166a8df9d1be6f5a578daa034d1556f48d751d573f757b797a6080 +size 61903 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..083adec4a187be6818fefe758b774ecc6727f8ab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80ff31e7773021255bb08d5ccbe51b67785ff3b485f02a32eea1adde7ece172 +size 32805 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e251ba4b9258fb3f1048a068ab46032e76e4a23 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f682e3e65783e8af72ba8ca64b9a26ada471302cd04a6f44ce6122b66f22799 +size 35255 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4685330186a83693d4508054a23a1e29cdb60b6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cd7b755e1a6e1a41787a2a33f0de9d96c6c8f46f54f07b6f6d494c82f208901 +size 32222 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a30a48ba5f0813a150660ecfad23a7a9784a012e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec901fde26356db25dc94e302377305642161c71ca8af70cf8cf56164c97afb6 +size 18877 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9fe6da060fc81b5f7ed1dbe39dfda922da29144 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ac6376bd4567b91a5ae1d5d9b75cf2834cade3925c4480a0348fa5d13264ce +size 20892 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db3d97b1547af8f3ed5f16146974a50d9842d7e4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2945a2dc5cd6ddb210c4e97d4ceb70ace7b2b2b8d59b6536fa490044a3763d3b +size 12873 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ddc3e14cccad4772352813abb00d86a16efe038 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b683bf565cb129a4a5a19c22479f3cf1af7d6028512c7b26f4c006801af304e +size 34621 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8aa1e2cd516c5aefcabb6c252613b5fe7635e3b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50104ebde1fe7a60b090157a545f9f0a79952a0b24abe8dc10f4d241b7aaea8d +size 40964 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d138fe1bf27c65b5627e222f87834ad7b90bf3a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d15281caa85e00b2b46ccfd404cf3595947cb3d41b395e863a4f2c0b74fe520 +size 55908 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..301f9dbe425fac065ea86075bd62b20131576534 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2528e7641dd0b9e243990b42c567361ff70d49c53c25c3b8f6b3a06b74b8be72 +size 27282 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d361fe482e9a424158d571e07868d37216caa249 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60b3a01a842b3610b89f943dce5715c6bbe1f4a03ed8cd90dae6b4b89c760c04 +size 14719 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ff2a7c07bafeba4f6ce22571157c183bbad6c64 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726e33f7d70fb266c921dd238c9a975dc4c6cb52f7fb3bfb083abc02ae167631 +size 45478 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48b27e893d019cc6189be6f7032331a98ef7baa9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd0b170c1904c0c000f10d53c2ee1796224a37f603afc20fea2b24c54efbda98 +size 51117 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..744e0e3732adb82a8fd2c426ccf7b7727b27d836 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b9c5044cebefcce532b9aacb30ce781a4a5642bdc0c5c733bfe3ad8f19edfe +size 67607 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a5fc7c1d8dd58b3c89334be81f06c7e4fb75cc0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58286a55e64550bbaf43b8ea926feffa904125757875cc654dff8c48b1d2f6a8 +size 34648 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e222330ae628dda9e8b33ba2248ca8d2508d0bb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f550a810ce35f7f670466c5bc6c8e967801f3ab6ca15bbf6da457da2645851be +size 39133 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0552a3ba678b05b4877901739785535a9a1c7de --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1605fc745f01fb1714b79a4f73c32e9704fc3ca903f3ee2ba60dcf63c81ac32f +size 74275 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_072/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_072/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d37200dcd120ba0dbe3755f517fe0adb28d232b6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_072/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d27cfbd30ddd9a82e814551d4f6bcf9d6c86cc5e5ecd3b26182cfde5466a553 +size 20970 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1910212e3efd964e5f597e9afbc7f62bcc29e53f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27c22d0e4c3909445a99ab7c3e57653cf43044b7d28f30043f2fde6a5e74e6d6 +size 16849 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55e5617a719a265b3f9fcccc4d5e4e364f16d9d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bba8f8ef93673de7ec2e047278f13ae2e4717c5ff0c32f400a07e1ef218534c +size 74300 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d6bda4b4ebaebf3a3829f14f02f3b6ebeeae868 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06d1523725ed285649f26634f6e545749d962cf759c66a8c6ad533bef5b860d6 +size 26771 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d6b739917f1d1233b96ed7de9d35bd26cbf6412 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8bf1d3429ff582e53dd4982fa0d19f1ad5b132d5fd5170682a21516dddc862f +size 44137 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acd92081e0276af5e4706bdf22b892f2ca2745aa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2b135e7d65fc0ec065f36a2df9d392a247461d35dea97d41d075987d93ba50e +size 50522 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a8330b19109cd6127bb997fc764f09b597a9e4a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e20729c5d7a8b4768c17380f7736982040d0dfcec57038b644bcb0666bc4a51 +size 63710 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..148e125b28e0e6e5110333591874fd443bca2dc2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eda3bfd67d6744fce122d34a0efb7b299fc41989063b242b1a1b797669e8f92d +size 69331 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cbd78c0239878998dd7047461fc760d597983f3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ce62099c33d83941601cf1848af285a0f77003fcb2051762b1b48e1b98d8fd +size 36594 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b99de1682795af15aa62b6787730a7755afffccc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5685c16c7749344e741434d9c034ad89a3fc2ba0ddc3b017684ac3ff3c92b19 +size 47418 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b9f4557798b73b76b4c727f7c3d21cd14c52a4b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77180757b2578ee952f15e29ee693a651129ecf8ec06b33e60ae3cf2f2308072 +size 64868 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..579ff96693accb42f1fc8223820bf48139e1fa1d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b36d623188c5e25fdf03eb59818fec092ca1fe1b3d7952ff9418cf6c60cf3673 +size 33170 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b3e03c0e5d7cb553dda639f14753bca040bdf9b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3ad0cc574ca8fc80a20a8185e0a03e3bbddf42de346d0eece9f238305e30276 +size 36018 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80313c9b75c61bd6373ad04b1ee298c80282b5e0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f285fe58500c380c45385fe5c2dbcc66cdfb57adeeeb4a8ac980180843c62c7a +size 30354 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4cfe90e99d5461a6c29c54e9ba7e033815e46099 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539e2342cde0b6b60a4efcee9d1a8f6f772278793d7205f83b592e66c18fbc6a +size 19524 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2ba89aba9ea820a50094b7404fe34b1107b8f79 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ff6ee1d83e5d46869fe8e7954cf4dad9c98d515e242d64b799aaf539cd0a20 +size 20890 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9265d881b5fd3e78b148db2eaf3349e05e581bcf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f7fbe5e79d36e9da3b8ea204b340092970a273d35f8727eb9588d90426b19c8 +size 12752 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ff3d7a4fe80844810b65467ac62eebda1700ae2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae81b5fec09dba19a45d90a88b558cefa2ff03c4716a42fd3183993ea544dfc +size 35360 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..717ec3d24acf83a33460ca563bf8e2efa605f1b1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c04cd19d4d7651aca02bdb3b733c8902cf720d3080391d8b43253220b0ec1df +size 40093 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b08684e6ef8c8b904e0c2f036e4e4316e9c4fce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baa7b1c176c58e857a11bde9da17fd7bbcfc8b6ab53d49d4efcb9a928721c8bf +size 54859 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1b254cdfa616372eac3d61dc78897f2f511b6f0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d1b65bf104979116a7781ec76ee277a4950284f30a9ae493b0995ee606c6f4 +size 26478 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f0f80e58c8ee21b865963f23c79972e95fcb276 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8430ef2ffd12b48c24f6ee7bcabc9360d4927f9f6b434410ac9ebc3b1067cbb9 +size 14813 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f54f7c8193ea26043de97db4f451a953564aeb3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51034672cea537ce323a88df950110d738c4bfa8b8ccfc4ba4a6cb0838e75e55 +size 45508 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e553753747406c67c7bf51c1e95dae7a3cbe777 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7540c34eb27822f52acc31a1c11da929a4f03382466663dbb537bf63eed4e96e +size 52032 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6c798427b31ae19e321d6d452bb4668e210c157 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:107e4705903e021a771ea42c11d485ffdf278a54eab978168e83eff8e334011d +size 67943 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..daf24d3616e6b2f1f9396c8a75043df6c9cf6896 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a075818f9e37c1844afc18426e4f6efe296c839080a7fc681d69a4e93867f582 +size 34635 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e53f25b62abcb22484c2db80387cef9a25b856b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8f77f14ad47b41020a23d30f6af6aa9ba3a7c9fedabf14893eeef804b1ea732 +size 39159 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ced1fbeb40b4aa6d6a36b64160a09f197323e89 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ecd16cc7fb7a93b05111f324c357e63c0134b5d36936bbc230ea2b54c811a61 +size 75256 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_075/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_075/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e20bc94cbc22c93cdbb7f794c1795b3ea53c05cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_075/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb78384ddbbe7f3cf056deb1b0c389f0f5dff1e6efbb7c7b3473a09cdb0208a +size 20993 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9b7bb585f14f588177247581c118262d71ab8b1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d64cebf7e77e0f74acff130899989b241650c0c34494862680d3151e5bb70d22 +size 17375 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..273fa7464c4ca0b08c847076a9000f2582a6bd19 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2a92ee1a2d263e5f377de5aee21444213839e00144f7d62a3320fc9ee861385 +size 75003 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5024389da5757cac948c7c23c3d7039ba0ef5fdc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b15263694a607f79db455a0d3a9b5b96a1779415ac09b378c76b009ab3f18983 +size 26619 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5b92abfbce7e73d3f4717f2b44d2b1bc945a8c9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:274de680a2a7d15387607759257c3544680b85a577cf325a34d94832e0fafe5e +size 44694 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d913e28e0df70b142cee5f3480bd1869a039014 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7729f38c0a89ff8938772a6423379327d0f812cbbc6fe502b1f6f39cb84dfde +size 61370 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ded3b7ef8097c78b733b789c206394b652925c7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a033c3df070a1bdff43f2b2c5f940d4caea5d7c09abb5f4bc0555cfa37bf9e9 +size 63682 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33d5796a6bcb5bfa63cfde6bc961ed9ba90c42ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5413ae303d2277954159411e376d36081c5e107b9dea410d8a3c95b87372e59 +size 70212 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4679328882986e821d4294145bd45d0df6d5955 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed9e651e21d08d80b654087d79606c52899993cb053aafe57f567f42e558e26 +size 36712 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4d4c84946db2e7d2c69a4e126a0418645654633 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cf314ce3bea50f00f32ddf414b0bd075eeb975d9d17a15d178ab3b7ca72c8fa +size 57298 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c3eaa267147360e1a40a24203ee01dd7cef0aeb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5802e163710a89303840f8a315366f7cec1212218ed2880787b6eade65773ee +size 82349 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f91e07cabce1c80aca2038f547139d57d2ce24b6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ca0f0837d23453dd60e654f4c017776e7aba786394aa1b89e7bbe93aeb9e8b +size 33352 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..030f72398007725faa82103a5223ae9a7d45553e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5594fa69e7ce54e6e0ce3a80dd716b7be8cc026e8df21be9edda5093e67cfa61 +size 36712 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d2b44877c62a80fbd9477af2638d24fb1679963 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b5c979fb9e90eae29efe38316130f9842a39aa795382bbccf0fc77e9e6c916 +size 31635 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e8d219fe46e7747d704975669293d13c834b762 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2db06a53aa0bb187048332bb68484db068c46bcb9a97f1f50f79cb59fa91b3a1 +size 18901 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc8c3b3c968b660cf1befcb4fcd44f02bd3d1da4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:983277ee95b0d8bffe16372dfb20f0e0eb1ca0f25b4157d7d4b8596ba2a66438 +size 20915 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e67ea77e7516e7b9cb241d4b4607edd7766dd21 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fc1a97b26765aa4c469215f0978c9cfea75dde8aec77172662f67c1e0c62861 +size 13421 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a98a591c1959b740b36ce4e6b11e36640971c466 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9da22e34017241c8a29de1b2c5228e54ea261171787e12edfef8a117818cc758 +size 34559 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d8ebe3e6694cc67cc34ec1e490be33a85bff887 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d80692d7bee270a0d0330dd1d4df8242fedb289f135b6ec29d340ad9ac1ec7 +size 40696 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1dab889c401ddb2954e0b984a461f9c889cc5817 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b12a4c2f43fe731c0a7def38ea1836d8ba18470b081197bf39f0777af0698d0f +size 56263 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e9cf2954ac0bcf015c1e7c62321c44c9757e1e9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1748c9877be1d2a7d48adfbf710b23e51144b7368946fd8cd12a4f817793dc36 +size 26949 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..132d8262d4452d4f36f3112862b19b1cb774d1d2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ad48e686c2b8a42964d5f557c29dbb2a2544d33b5e7c1e0f52edffefa662b6b +size 14761 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6973c75694a9c89a947fced223628e35a90139cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89801258e66116d2653aefb2e797dd3f71c2da170120c4708d66a67e7945f4e8 +size 46284 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e3dd596579f92d473def36db4ef48527d6c6eb6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab296fdcba2174175bcb467a85e04ceceed03c39bf1803432af51e3828fa9af +size 52014 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2957fbe182b3b0beaa1bdbc99601c4bb5d938a8d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5385cd26c0b0142ab79d3c71c12829e1b78d92137d4c999c168fbcb9a812ce5 +size 69185 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d467b8c6e4ab9aef52fa50665a56dc2f8d6e670 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff5dd9be52889a56cf74d4a01c62076c7792ab69373efb0a5e91fdd5d235baee +size 34686 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe889d28172362c8783253391d88f6ba67cfda0b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd0809a944614c4c0bac4acf28d60a6a54e088b7e26ad56e9363a3c387080d03 +size 38892 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11d1b3824df6b07427bc7d0ad24697808139cc08 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f491adf80b7188b81b01281b48d39386c59d0d374ba1b62547076b096da5e16d +size 73788 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_078/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_078/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..295d0592c7e5adfe6bbd4de6acf2ee84c4c36aa4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_078/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53cbaae164ab16a05219cf0d6c9c1daa70ce1d7c852ec83380709ccde48a21e4 +size 20970 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..744100f95958b0cbc333c3cad73803f69f434046 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:243d412700f718ceb21e9628b2842db2e671a7e312a6a6b0239ad95252d17b29 +size 17344 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34e60ab558ab18f5eb10ea73a5a0b6db735c11a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3aea5d78d668026d5bad16b09fda1822fc228b9639e70a1ba62e4b063127ff +size 73959 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4818bc9427e55efefb4b61c67310e3fc066d0a19 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f35625b4970fa87dd5447a125c9c42dbd39cfa1da16b712b4a4210ce2f1020 +size 26809 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b38c94fe6eeca96872f0c65252e131c49cba490a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944563bd4ac857a5ead4caf76ca39d1434197feca7e959fa98d14dfd27a61c2b +size 44660 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8ba48944e935130951cc8e98fd60e77849754d7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fc8f455d52a36099b3dcd5928d3c07e1f799b7a38f9e71854d91bb5fb916bbe +size 50704 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fec9ba805639530430c56080b4a391863a0a391 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ee149af2bccc3ebb8e6ae3cbbffb1029d84e61658b4da7be63f8370348487af +size 64036 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a91c20b873e830409369fb1e2c09985725ccee59 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad90d4e6a2d788ca8b487d98239517629f8b29ec40269a87c2c297e16d20c13d +size 73320 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d728c97da9ff101a8191bc854bc41a5e2a5f5b2e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312e9f0ff6e29cc6b93d63b5d3be83ac6122e161285df75b2dfc54542aa8d38e +size 37234 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82db1dede786296ca32ea938291eab728fa53e09 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cc5290f478b26df06983add48341d780de6640667a5ad032fbda7092016290f +size 47901 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5c522674a93f4a05d406ab26e7fd0c6522b9cdc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18f59bbe750502e85ed2a9f3140bff61ab670aa6c6155f0b55f6d1a03e3f740f +size 64226 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1833e26dd329aea230714e75712a894f2a3302a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e8c1d652c5688c084c4134b6d8cd55d480f183e1cb02d87bbd1b9a64c23eed +size 32530 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b94937746dd74262bb03f7f45044da85fcfb9cc7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e56e93c74a69ecd9f0c25b06c6048bf00a42ef261ba6e5f7c1c406d09994c14 +size 35442 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8aa5dbed5ee65407e9e0dcb079cec630ae3e68ab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4f1a6f19794097508eb807f785c842ab5c4319e7ab16e4d0e2d28030c59ebc8 +size 32151 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64b19250a54fd4373c35a32da305bd19054e03ac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5f62d5cf97823fcd4888d24ba1de18fdc47b5d0ddec5b99589a400a7b7170b +size 19337 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65945474795c3a869c1abb2bed26406e5175c980 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2afa2f97031958ec874e2639c5b0c29668aa058da18d8434604c63775d3c50ca +size 20880 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..950fe1bc90a92a880a40cecba6ae3f59cc420530 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6201c7106b453e08f19a14517fd811ed9fac90596134d3dcf704521812846950 +size 12846 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e410d36aa7cb83514475778acf63a89f7037df4b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b917910c4d0a345650323418a05bc3ae09724c3549c2bf9fb55bc6784435f369 +size 34772 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdcd015b71fca220d13daa598b3d1ed86ef1c361 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17cb108db47e85a461fa07afae121da3afc20d2df01e20e9b1a8ea5d1219f335 +size 40615 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46571f811f134ff1b6cf2676b5ae8c40ad2a4be0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde7bef9f1621bbe3e100c658c172ec43cf9029b841620c3aa82758ffc19b466 +size 56532 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c8d46c934eb0e10ba19dae948a7824e8088a654 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:815f498880ccca145587a6215de5696020d0d9cf573748c603cac346659701c1 +size 26943 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca48256327b3dda559a5e049cec44c5eceab545e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f33919ccdb5a4fb4328c9018e4753b410463e6373f3f95c9b27a55d23e1a4508 +size 14717 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2cbb407d27ef8db7841330bf51166506f6ed6e7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7257326b71a4c3d5bd44ea75f89b297aae5495fa8e908efb83d32ce7d563cc66 +size 45756 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c0e8e94deec0aa41c72e1f4e035e210415ad8b3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91c6f2788a13b5f85159d6d5f95e70c5e84ab20ec01a9e34358361af85df4537 +size 52680 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2261c3986a274df6124a6f8d7e2732d0aec5bfda --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acabb11ebe7aa642d1704cc2b453daaa724518c10b19d988736604247db4e107 +size 69433 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b53a380d7f9b7500db7dc3f8f55acc0132a4dc33 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aecc973973eca2d396cee15b1c4b8a24185026a532fb754de92791ba75c9c9c +size 34674 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3fc2a2b63cd3b60a9f98c05d57a2da5e067f399 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f894eda63adcb9d057d12caaeb5507238cfb3b9c42c3811a3ebbdd3ba76efeb2 +size 38853 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a109dd9e41a3776ecc2441ea334b4992298e331 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a371de68d1d5118fb7a1629dc8b9305d69c0b61486c9c40b8be86a3e62154f1 +size 75608 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_081/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_081/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..808bf42e7c0cc435abffba758e7684c244807f56 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_081/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:807a0288bceab2a03f4c9ce5c95a8a28f34c4ec34a7ad8be0f695088fe226981 +size 20984 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15851dae8d17eab97cf0bdcb844c125b455f24e8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73cb04f4f3ab0ea160ab6dfd207c7469442e9ab5a472ea4e3848d8e497e18601 +size 17374 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..318f0fe5dea8e2d5a1524be1fabd3d4296872cfb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8863b362b2bc7e865be8c7905019acb4977a0d4694c96c1fcf5a7fc6d8a2127 +size 75165 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e06dc96d9eabdb8064e84ac64b12383dfed073d2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47a07fd234b40f94bd8c22c883ffaf0e98cbad76d0e95f50aa586c8b89d17e9 +size 26345 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8ae5a69c4b5e2500db1f763acac53f94189a0d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c488f608fc10727e7dbeeb1126f85e6a8d1f234dd4a5d0a3bf6e45afb8259998 +size 44272 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aea92c9537b32f58c24a2899d361c1c0ca04fffb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6314a157ad3d9ed22a322b6a18236f25e00a1ab05d70c5bd609a3c2359a4bf9e +size 64195 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..177c305d154d3afa0a8c76d877cf2f883a8c00d2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7711eafd2055037ccb2f9b5c9b3afaa992177940566eced364c94b885725acb +size 64325 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84c00a6d9724b1477a44e8fcf8f21cf844cf5bf3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c322cf77a337f80098a394e1eee539bbcdc62cd1915026321de58ddb71b46e13 +size 76991 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35c34da95d20e56ef826fde4c25f2cd1b3c992d1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:598ccbe5e9228af7767dc67fb0acc80a8152e4d9d69e139913c3e4fd5ff1e57a +size 35959 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c77814754a35cb007a022a3ab64aebc43080312f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a59b95a61c9a1bf63734285379370472ba270993e0dd130cf50709b6bc7a1d +size 49601 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..807a0e4457e7667c387e8e595ef8c6e7abad58e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:278dd0812946314c39ede35708d5d2ddf134ec9806d2aa051a28b0e73546518c +size 71692 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3500f6c3e25e231494381e3fa38cd28fe7763f9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:734abc2a8ab67de2a7adca3d4be601db3d59c9a8e77bbdb5c7f23002064503b7 +size 33140 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65d1c3649df778aa23d58c62c320205e934a4a85 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69a113ee1c10661214206b703459688a540a49204ae29cfe6a38f4c38057fcdf +size 36456 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..551c41363dea1567204f20275246f2cfda1e393d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392bfe2083757b4515bcc5cd736ba7ec126f88d941b37229e281a8e3b38c4368 +size 34283 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6dcbc9c85b2c49c69aab158e18a5cefaf647c62f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:691dc9e48581d205d559faaa42531cdc8df5cffbd68634a0c2b1fde5b37d6505 +size 19273 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f02a51879c19dc4cbd55f737e0bdb2dab61d2e40 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1034dcb41c7b27e93fe69b96678c957f018908d1c0d5da7d64f353a87dcc2c +size 20888 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..064f21d24ffa506abee6bfb294d3dca3f83859a6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3535c023793f2b2800ca2bd371ef95e23c042e822c21d1a5585ed3a5aadef76a +size 13519 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13e1eb5f91d3cf8bd024cd4f99b24b6fde0d124f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1238c049e565135fb1497247521192861e904ead2dca3c1979f93f3c61fbac2 +size 34282 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d21794680a226d16c7ea80a9d50fe65a97d104f8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1f08fbefbee8dfbcdddf2cf79b426d14e9355d8c70311d7489261d5892b0b56 +size 41057 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e345ff8fbf56787ef3b38478452050636e4399e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b24c16d8b9fbf564c849a6de1ea00dbdbd3bb95e2a5c8a73a7df30295e5940a6 +size 57309 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee0c0602cb65edd23dbc7575202b43e17ac20295 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:755b841c5104bc094d313ee598d7dfb5c145155c974dae94046be02ac50b71be +size 26994 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a8f9558bbd0367ceb506dbd5824f74a4c16b916 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dff821c809c7dd39d6ebf127538c34f97f71d5e37c6b08e3d307025e303a3cba +size 14769 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ceb9ad979d2b95ef150ad0b930a496526dc740d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b58164094df1e150ac7b4b1792808771d9abdb6fd590867ea6bd7e2f16169e6 +size 45636 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afa4b8d132857b12b15302bf036ad4a202b05157 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc86559deeb5ca436e32cf5ff6df0eacfb6ee8436b9781e81961686d75b58637 +size 50520 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fdc0eb937bd827ae4f3baf84495a8d8faecfc94 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40d862d7437cc1675fd80b2581056200be9a89124447c847c2bc70b189234fa0 +size 67077 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b97836ccc1fe810f72a2eaf512ab6e0ddf23518 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb13496e1fdbcd961f3f8e8f91e625824b6c6a88324971aef10a9796d514c59e +size 34421 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6e0f89646ef2c881a3ce5c411fe9a8a8e81d4a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d4e05b3a97fe99e7305bd1add0c16421976763533df29d9f098834192ea802a +size 38346 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a293d17e3af6343f5f95410f03b933f2f2182807 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce836c2aeec73156a3b62b34a80937b1cf76f1a9c540295940e98ec961c87ad +size 80007 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_084/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_084/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67974b8275ee037c9fa87166ce48b4c967a4215d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_084/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ef6ad65bb46a2a87dd59a467d464dafa0bde8382f118f948197d5de554420ae +size 20991 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7081ad5f3c54be00b865a0bea95ce9c4d08b5f87 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:464b35462fd5b19026571cd67314af4edd7028b48a2850b3ad50645af71c8c48 +size 17920 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8b32a86d57c7d74c8c54f9f98671051431c6db2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a492941381e9fdf98fe276ff1feb76d30efedef0691e931217f4eb17d250fb8 +size 75047 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee6dcc612f7862350cf2654e401a7c56fe993157 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7363e793fd9b6c2570d75da9d1de5ec27e489d3ee735e998510a4abca2440017 +size 26345 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..055cf3175ab4afcf1f78582ce9739b592c04f902 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea19dfcc8ed959ecdbb0bd0f98fd7b5e888fe4a9619fb6286304e88152eff843 +size 40428 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ed679ed3e370c73b16b0c2ba9d714ccff96062e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fed9a99a7516c85b007b76223f01ea1b94a98f03c74c8278180c626306ab510 +size 57913 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae9437362c1adc2bc304d434ece57fc2fd82197f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bfdbe6665011c16206e4841d7dc3696ed3559d1e5bf91a5150154c62cb6bc3d +size 64719 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f716c0da987a02319d96625e2c0dda1c71ffd3b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b42beb2cf7a0a9b5b9cb8497f50aa96f5b55e748a2694f098b588313ea5fb2 +size 72947 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..898cb8d2106ce401845a8b0b856fad9483d4d80c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4bb6154426f4fb68c7dba9ae521297dd6c21a984530ecf7ab69bed84a93c434 +size 36225 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ceaefd9a65d268ee9fca8f0b90c23e1bf17ff8f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fbf149dc2f27f2c27f2c55d99dfa10dcac22050ccc92693f8c20238f84ae966 +size 49805 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eba9cef84ad4e790be91762133a0bd46cc1bdf4d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:336fb32075fbd55c636dfada954bbbb43529ac703690ec5d61187f3d728f90fe +size 73683 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b01396afc379c754d432d46a07a8ecbdc0b65858 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:120158f7d389d970a658496a38cdb6b120b2c5f779d7662d165cdaed81486779 +size 33136 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70164ce8b012be6b51c2bc79d46d3f6bf1c1381b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc765dba334ec26c28610bdf0b9bdcfe98f737002e900f77111a6ce1dd56f3a6 +size 36432 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58f27f8de0bbe615dda0accd0421bf24f1e09071 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a84c57df34e173f4d3113a6ab263c7e4d69039863fc18e626e73000c700213d +size 30461 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fb3c8b2eb7f44115897cfc40a65fcc77f361d3a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a429867b43c29b6ab08abc13374b0525c56df07da6c9abe5e5d305116f81fa +size 18764 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..169cbed8d7512e34195144413795d63ff1bd8ac7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:185681815f0707bc3910a87df2e2c150a01107bbcaa9472ffa4cdb409bb60c25 +size 20898 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31845af6e39e9c1c6ec17e8503991ff4cf209aa7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af7a9215d665c0ea46b4a5ab60c851b0d2fe06bced53a6893d805e2fbdb8d32c +size 13048 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b71085fd91beb1631730edcb9e51fb52f57990f0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48198202642a33e1d97bee53d31cc633d6eb5fe2c22f936b24059b820364a645 +size 33909 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41b33e9e9479f6670fc3daf801aa41242997108a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:694f6f8ef4f2a0d6c9dda02331912a0cd70a52445198c9c9acfe33ec9a81b29e +size 40414 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33ed2dc60b2e0ec776359803562f1a68aa78ae69 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e3f7187c5cdbcfc690a2e71e228315811771356a819b4f151c1cc2acc5a4bb +size 56001 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc47e33b15be424a3d4dab186cf4f4be09759115 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5efbc3b3132d10de55a7864312309fec80932a449f1b1bbccf47730d02985d +size 27239 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d79984623c96bc374e312b7a18b1ca5a1aba4aa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64b14924ce473b821a02f7bbfe8b9d9a686069d9cc32a84412e3e6d13d21df7b +size 14793 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b509bb40eaf58696fbdc214c86a0124bea3644d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c8067bc275c9c6f998cb7e5686d4226d1d004a6d33f3a7d9d7ba761e5460e29 +size 46501 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7721a6e8a1c50207f0a97a3918dfcf82c2f5bfb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a764ea72454394903ce3333846327c10972f87784b6d5666019124d66720dfd4 +size 52980 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa087840f781d84dbe3def862671d0c8f73fbec5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e3676b8f202a5a323bab3d84f2750156565619f6528cd4b34875da51430d03 +size 74434 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d44d536f51a3b4bee0b0c7c952f2f82ccab27a8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbeb7fc29b3f6fcf0714ab90559f5b68c096192afd8bf5428602f70bc59e6666 +size 34878 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46a42e50d42f8c92a5a0452af737316f782eb8f1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53243b201c335ace2f42367be51024d35101a450657fadbe2476144429876904 +size 38930 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7639446e230d6452f2d13f90c9fa7f9c50cadda --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf16d62306b9d60038970e06a4567a12ae30d0921fd5b6220e1b9344fca506fb +size 75778 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_087/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_087/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b4497d0c0a8e55665d43d44486e850dbd87d2e5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_087/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c143dc5cd2ff020565242c403371bd419a78d9918dadd3987d49cbd51b0078 +size 21017 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..789f0cc99cdbf7ed8b95337e18d2c278e49143ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:439d94d1a37a6a9a1de8afd09985a694ed0532140a0ef833f1c55c6382a1a3a6 +size 16421 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9383451ba52cf4eadbc48275d9af7ed07c6b242b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b72853d82903d58b12665e99f4ca753094f82896b245fc9509196477bdb0f77d +size 74481 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d91cf5a9682d196e0234169a9b5614479685866 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88b58ab3e37aeaab1a2ae9e9b3b7464a06c4b21fdefc1a118f137e22c02fa090 +size 26451 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c7b6d8a77efe3330b669e2a9940e03958ed9569 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac4a7492edc73642179cb94ade5504d793c87d5535ec51c8b728dae344cd5cdc +size 40750 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..806697e6909995274d0acb70c5bbfe15bd5886a0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b3342fc5338c8e79bd42e92b2fb078288a4958f2f815c4cfdcee25c04627c2e +size 53654 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46a79dae7aa0b77cebaf3b2d68c52724559e2d69 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b896f443de12e1572f6f493896017fdd44864dfaf1d79eefb0b8572fee19512d +size 63154 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..923ab5130546bc5abfb3c155291e89e3e98fa74f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:857e2fa84df8d1faf5226e676ed9aebe4436e1ed9aec001c78bc2b034b7c98ac +size 77277 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3acbf83a3459d560aab7d524c4bfa83cc15c19e0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2b0b6a44b8e70b29d38a36907c5268b4f17b98b0879c3bec7c6f72b7c585b25 +size 37768 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a265d518a998eca26266120dc2c2f01effdb7fe2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48be6f7593a71dc1a017dfb2c8975b183a62922e61e4ae3d229206637c05fe0c +size 48279 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2021140bdac7ea3a6f6ff296d79c95d42a563ada --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45221753da079d6c05a5cb4bc31a02014b598e62cdf43cd5c8662de887e1b574 +size 69585 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b1b8a8d29d2138feba02ccacfd92e64bc0e0965 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb74c4fd16c18bd351ab4ec421fa11e83ae7a358c4d2fc15c2534cda15b78b92 +size 32495 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a7c061b89a2bd54e3f09b60be3b3ac1c685949f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e92b5be5caaa4725e5df8397e591e9d831931202ff77e0f525fa44af66498621 +size 36272 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05ffd70b31604704ab6d5c9659a5c7428d17c651 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9fa27ae8e576cbd974937b7d4120adb62d0332e57c208eb6d935130e551c054 +size 32023 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..585e1583b2b2d97e6e8167a66e52b0f1fee7a084 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b2541065ca836f0acf47bc18a05c179c3063edf297451599fd0995ed20864c +size 18938 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50fb5be8c20ca59080262ed842cccf6d8f64f933 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b237fcdda7b552436d4f918cfe00f52ae3882967b606c634ee38ab1f4078699 +size 20933 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da502e4cdc58288c9cf80e5eae96f57f985ebe67 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1960a2c0aef2867d7d3ce96581b619bab5db0684152ce5ee4f26fd0aad05353e +size 13601 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0aa222021461d9b3676e4760b27caf843748730 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86a26641025231be020871dbcfb0c2a502bd0cc1e47c6124f6d1e12133b44dfe +size 34160 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ced9dc0c4bc785884683efbce45a602b2fb930ae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6088dcd25eb7a25d97f24c57945c28e2a21f1e062c19971ad2c54dfb1c5b0fd4 +size 41558 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10b5b133f6c9582c312794b430ddea9fc1d4a73c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94441e72184cdf91b656d36d9ab71efe45bd93c8155f3449617d43285f925b86 +size 56727 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0c32b80825359f622e22b373631999ab8452130 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36774b7a61fce8cd69345d54ede340e110afe4f4490f89307570e031d7037af4 +size 27062 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..901241fd0a29c931d7d8ccfcef767da112582e86 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ccdabcd684b908b5729c6ee86396f3a6ffdbe6f539222c7b6de923b925117e +size 14935 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e0dd7e9980bf031aa500e986b8285f11f120e90 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85871517c888f65b197f6d7865d60e79e1933fd22b16b2fe776f599d451eb01b +size 45974 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2b6c06cf9caf2f8659878dcd887c9c57e93aaba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5fcad550d84beebf951015033d339afd5fd64783c95e86ddf11ac367c7f0b3a +size 52461 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed6aa74fc30085490331605456524fee9b69affc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7dda4c7b5dbb7df619c21800648f590a29d7304bb26f86dc4f4b9689ff83183 +size 69556 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbe70fc88f8cca67ead982b7df8aabfd3f71f2a4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d562d8ada3b6da4b86615d989c6f954ffb9f16c4086587eb0e57787d9aac2e58 +size 34803 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a527450ee5f1b338a3988f2979099618206a7e62 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75721cdccbe3cebd5c16a9d805bebe4c0622e9efad10b1470da57b198d50fc85 +size 38811 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77ff9c0585e48496a4c0d1686aeb8e47c7f62cad --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7530642388ba7b8d22546493119c7b67ad6cc4346b73ac08f83bd3250bb8555a +size 77131 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_090/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_090/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2e7dea49b5481b4ff82f2a39d4036b24e8d8c8f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_090/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c5d089d1c8461e976d1f7b9e009f21b6d16f5d7a0dc18cc2bd1d106eeb8a5d +size 20948 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffe468d5f783d85afb864f58e543d5c621118ff9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47b998e3e7b3797d05e33a93483eef6b73ad808f43efc4d7f0a4bee933cf0909 +size 16326 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae8398eef50c0585e80cc9bbf18ff76969145c69 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70d701127754ccec12f357eea5803278f439381319b19787a1b3cdbd1c40e5c5 +size 75105 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ef70e956285445cb13a51e50799161e884db72c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f93e1e3ece7f907e76fa741c07f4d81f7d16e34a5e745b79d2fe4b7a3f9452a2 +size 26758 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd6e0e66fa60338f3977f2d09ccdbbd37e6b7300 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d33f9dccf36b8dcdf0a54d200e2ea4aaf72a8fa4bc8ae8daa3dc8d8afa4698 +size 44872 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08555d51644a0fbd5e777c61e39ae555806785cc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ace1b3472b27abd3b6b59e28cc823e2688b16857fffb12302252b19afc498bcc +size 49214 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bbc229cff9992f2b68b5f2d0365d61792c688f6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe31e37b2b6bee2a1122497fe6ce769e903101d429729ba81dc89604b893989 +size 62743 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51ac481f1b04f90129603b6c70b28c756849757f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ff92d083ab5e281ebd95b45f34c0a67b3fbc6bb06a557e410e42c959a5714c3 +size 75102 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..145b20caee301247afd2aceccf57279c4fb1e262 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de9379b8fdfafba6f5ccb69f5f3ba8deff1a5a8cb27ff19445146651ab027275 +size 37042 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7632edd9a1ffaf9e5919ea98d1f79450807fdfdf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4db61af855a0d3e893a4734c53b6780879ed60e9a5c2c996e8d614904d251b00 +size 47997 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6faf8b0f10a5e8d10316339692a1a1f422b63c15 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aaeddcdde92a2b1a885e9386636e28f514cad7090ebbdfd8a37765e0e6d9792 +size 73855 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ae548e8c538af6d726d81d9460d33820ed0b8e8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75db0cc510004271c5e09c112938e8df07ae2c90b3e098f1d5c4b84e62f328d4 +size 33146 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72e27706fc4060a465457fb25eacb1914d99677e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ae9b8b5b99d8363f83d9e5729fa147fac39ed9fc18d34045c6fbeecbd2f3f0 +size 36676 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..622eb4961780fe49290bec3cb695620922c869f9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48703cd17d12d838b2b4ce5b9dd67ba578bfa5cce2a220ff8970b32aae9d1a21 +size 32461 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5fd242e5af924e71e6b7cc6cc695df5941d6b87 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:069371b38db405da16d95b3590d16b65be0062ace9198ea59f8144d39844371e +size 18666 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d74d7fc0ce435e36d4e5cc3509bbfee94973eca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c07eaf77873ce78c25b6d89dca1f706f5107b4ada762d65fe361ee534df0a6c +size 20934 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf7e380be0076b0c1a69d800367a091d92b7059a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f1772bc23421bbd3132a439adff1553c2abb7ad581bd4a50a1a42c1be57760 +size 12947 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30e76c91cbebaaab8afb4dd58195d555778866e6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3db371d44e388e690907ae81b54365fb4926117b82cc5b0302f292070b54efbc +size 35316 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3389455273f3f2d313e7656baec6335f50f150c5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53066907251a9bd8f1218a86c84da54908af01d3a68a2a2fcd582a61de221e4e +size 40927 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e415407d01fd639ac412671d5e61c3b83f52dc9c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdd0054e128232e305a03cc37424bef9388e6d4ebeba26023891f00c8dcf69b1 +size 57416 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49f5b162bf2ae1a677010048e6d73f469e17a3cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b754e20b939b976b9e610347ba799ed24146608eee3ae8101c1c53dfc73d5e01 +size 26961 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf5cee58122079d0c6700e7d5ba139527d130cb6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf9badbca429002fd01d9bc147454fba1495dc140b49ed0579ed0c84f4149c73 +size 14750 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e93a898600fc846e0af5587825652ac841f592ba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a4e1f843c000d38f473c53a7a541918604395d3f1f3ac678a8d6070432295be +size 45968 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d80df981fb0c23f25dca4191bdd03f901da03d90 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c92740da7ee314cfc6c8e032521a38e62aeec659415cf9b8d92699ad393230d +size 54742 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f23313b0bf7de347e111e04a98ec668a29df8dcc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5787916278a59d7223812814174d476ae6c482293f0835a59683ed38816a3b48 +size 74871 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a22071f97d56af9bfee28fbdce9ecdebc9acb4fa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1d494d418e3e961f01900b0a1a4d9aeb133b71ca98a632eda401995e2743e80 +size 34912 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68384713878d7b3fc4bdc582a0e883f7cb4f8862 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2caab6de300e6c7a379e5b97324fdbb507109738720ebc68b737e8c84e46cf8c +size 39171 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b23f1fc05a046ccdba16d57fe82f8bf7dc661e2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d8cb59fb0c87e3b18134e1f82931eaf7014c4c5c26efb381106b276954ae39 +size 75926 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_093/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_093/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec144124ca5d0f02f6fd7b8b43dc2ab306ae8cef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_093/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35cf79a37edc8e8bf64772fb0a43d20298c52fb954b2f68e2c91f7f596faf7f8 +size 20977 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13340b86b655e520695688905498659496ca8b3f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39889c6049c46af53f0a5912344ff6451a096df23998274e7847b8acd519a0e5 +size 17077 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d732dc218ea47350be6b6488036ea9c4cf5ff6ef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce668162dbde38ed78f89f9e54739bff69ab975d33afdca79642992fd8be1ed9 +size 75095 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c57f0c19edb0fa4f2843d41d469741f4fea6b483 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe7fa9ed972eba6a049a78ffb5b7150fff3b55409c223c2b8f06d420e68d0cdd +size 26772 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2e484daac661a5812578966793a796f5581bc34 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1479c5c974e07cf7faa9164041fd226c87fb613e786ba1b87a0ed55988a05eb +size 42325 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0502d4e28152ffec1fa4e3ffcf7b1df11d224c2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04bea0292759afc5f5a14ef83b8950f91c74b888fc40ed9117ceb92812b4625f +size 44204 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5158d344f27ae4cb887238760665fe1040a30743 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bca7ebb6554c8a0aadb4285e2b459ac1a9d93c0c73656eed8d7e7bf628a9699 +size 63212 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6ad8721cb017382643b21717a420932067ed398 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfebcef02d15f05dcea24adc7df183885a4718b3931cc2371c465c92ba3e4bf0 +size 76227 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b373801eeeeb2763f304e4307d9627f7d2d72fa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87653e26b787c5a32e1e5cb816a3b8b75cca121d8fddf5c4f733a4c6f0c85f65 +size 37087 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..477a6f1aaf48127750e112c9aed427ec982564cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70e426e11a38cff48b87044bb5aad08d9ccb7a3876f308a25649bb6083fa617a +size 47417 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60b9b595663bb0a5e7526de716d48a9a9967ee16 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4910f0d8ced4e6e0b80f6d098035ee86086e0406add6f24d1316b719975bc127 +size 66976 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..779905993e2d198dcaecba388a42b9522f054674 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a36bda2fad099e9f02ddec7d6a21cb6e024093885758ca4acb7a426ce5eefc60 +size 33072 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cbdc137d847440c56784011c6380201be26b918 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ea3698b6fe257e9ecd5ca9a45a6fb2c38f913d8e763664447cb34de699a66a +size 37085 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ed6da7cd00bd0147f8e7afd4afedd2214a10cf8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f737c7decb26742ce3f2b9a0cb0b06d821db9ccbc7263f0120d21882e806d5 +size 33353 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a8851915133ff6487e2edcb99c6a2a6aa0efedc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bde6a25000b413f2f8cda42b4693e0cd595943fc708194355718b6c154d8fb3c +size 18973 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ad05db66a39284b35c688071ab15b4bcbdf0159 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b21ba938836264c239661a841efc1bce30c5eb3a5ae6eca1a87c4d41e5c53a9 +size 20926 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5bcd68c35605075c35a74cf4127d15ec863fb4b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7329709a3d17fca766365e3365ebb602fb97ee24c64d7a4e85a28122198f4961 +size 13146 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b00c8f8b4364ce0be6f2132237c26853817fc42 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa3088664867a14301e22ff5ac3c440029968c74688007885209888a5c8bbd88 +size 34865 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa7f7212830cf2ea8a10ecb92cd196d4c399c960 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc558b3ddb21027ac2d90521bce79de4aaa25cd33a8be6ea938f5dda07d069fc +size 40698 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66f48f9232e82c6e8a4bccf9d7c8a28fd4e99d1b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:460e2b0cb7a2b71ec75e1e56361ca4abc2efdbdb38d2c74d75bfa265c209570a +size 55652 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e45092c28fa67c95aaf70cdf128e359dbf0ae0d7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdbc7f3d58e4c27fa3d67aae6b5511a70b6768b7bf25c7340f21cca3af6f7e3c +size 27430 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..538cc2e6fd5676ac8dd28954d31446e311a381ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce2eaf518f317df30330586a7c10af57b06461e1cab552b3f43ee0741f4dd2e +size 14860 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8a46df471fc9848ed59bb2f13f4a118c61a32c5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c64a7e9dd00d8fda9f32f1e0af559e2066a39e52c23b39502dfb979a4f6f0b09 +size 46294 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7cd0d49a81ddf18fb3a8279f3fd856d9ce93b7d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e470149beb8d7ec11eee7ce3a060a1c6be5921714ccb1405e603fb601c14d02 +size 53318 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc14d274a2cfc6486367bc023c6c086ba7f83971 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1580f418e007af6c16b7f8576224cb418fc45f0d4873bd4e729915d47427bbb +size 71354 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e65dead1fca80973bfe3f3777ed90642407464f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2770bd850c5b77aec0a226b2d5c1a80490ab2ee91d758c4962206242d93c9f0 +size 34868 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d581e78ca6c9732772a5740a76e1bd29d0d26ba6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dff9e56cae3460671ddf6f17d9af0af5f46a9372bd301c37d45310ebbf871f80 +size 39161 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..745f5e2d920372589f0d60bfb2ba8a8867accab1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18fa1b935787889ce96f598edec811edbaa876fa7aa01b3a842fada919f88ffa +size 75753 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_096/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_096/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c6f7b43c79d6417301e5b79d163ba89f4d2e588 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_096/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:964b080b067f31f6fb1093140eb0b03811bef27ffddb5c52d037b9f971d68844 +size 20985 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20b16156618f773f5d4b48e508706f12f69b5b74 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95c36c20419725583c489091aa90c699dfe030ebaa5bf369df7957100267c756 +size 17334 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0d4e679564dba420172094ed0b3476ab6e6a01c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808264a04f8a08ecb2d88eaadb19db8da867c56063c3b2e810f66dde4f2a64eb +size 74677 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f924173537bde3f8c8c5de4f49dc952ebc3f4438 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82955abc3d8587add112a6e89c3f51aed89aaee379e150879ff6f1492d2836c +size 26486 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b091b41fc15a7a91d2234bc4a6ff6497fa6acfb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52608f4ba68a5dab4fe3bc93e170ac4a7f070c3e355a26ddeb5d02ba5258d39b +size 44559 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24a70ef52a693562aad11c2e90c18d9482ca5604 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc6e7f3c2da664c7a26671bc44fa2bcae1b6b9d631c3cbb2b6088915074890e4 +size 50897 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00011fae7d5bcc96352c4e0c7f9e92b5cea8a505 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78103119372464c2ea174de86e4f2424286b682a42a0eda957f5f8330c7cb9dd +size 63558 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19d3a192fcc538116f190c45a3fae27464d1ddbb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:418869dbfb5f93708ff73d68b92844e62369f8d597a1192501af6d3e65f86303 +size 72527 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c8695f8b22bc6a55b07d38dad4ce5b5819325be --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6620031bcf5a9ac7a182b27ff12ae7dad64ff3d99ae794148894ea7d2ba519a3 +size 37352 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6633cb68317662d939bdea4162101deee81135f8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93fbc2f6edc3e40c3461723793b71055f6ac76fbfe558ba9103eaad79721a96 +size 46174 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2d155b925f62d5a3f3d8b5b17b8e981da57e1c6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:157b9b182fbe9486aa01fbf16f710b1b7eebc6f0298e14fd22f22c13af00bd46 +size 60393 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..760f18a2f1ec92e9a671371e8377a796426580b8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9d2804f6c4e92e6eb11fb4285eaea598c2ef400ce84007890eb303329d812bb +size 33262 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a60b556b59d43e2350701376bda769fee779bdd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad80a45c073b79382df9e86cbd1647fa89d8e2f428da8f323242c0a6ef6fc0cf +size 36363 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62a23c201701107232429b9e4bf52a6da9822920 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b5669a3503e7676bb376c139fa22954f4e2c669e9047196c6e203e4892f1c2 +size 32992 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c462ff5385d78ef1ca16e392c4df2dbbeeb4731 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df1c25a4a6dcd2214300637d70e8a8815a18c3b4fb15379b59e5f9a37f198989 +size 19218 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff86dc1ac55371feafd7aaa36ea6927bb1eba3ed --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7adcf81dc70a55bd4b1e45b1e0eef2a98224a49d217e585d16476bba421be71e +size 20897 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c99a7efff11f11723ec58d72ef03b9d35681eea8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26abb878a4bc90cd5d62d89b5acbf80f0cb540f3cd7249c285adb1f44d9e0fa9 +size 13295 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ca117367006d815ef2a748cb68de1c612b94557 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08c42eab4244b40086a83484d2d69acbd3d225dc11dcfe1ed4472b0f5306e1c4 +size 33914 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e2b5a33891b86d72388d44f460e8857983a9d30 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53f90f484b9404438eb9f0015a4420e69d63c437d7f50b179041e8f955d5133a +size 40712 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75e3afb849250ff59bd2638242f8e8fe2207d15a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f87be233a47321f2c20114cb1750f8706941372e1f72c212aeb7c3e15b96f3ae +size 58800 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a705c073fae0e0acb86cc14ccb5628bf4644f3a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f91263a303eaeafd4df7a00311f3b4da32fb8a648f5bcd5756943701444d5413 +size 26608 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5952dbe51ce9da293ed832a032a0ee3b2ed14e8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33cb24c5e4ffb79616f0499f52a6751032513a977c7ae4423f782c1be4303abd +size 14777 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c4cbc219bb63777f6c708fe958ef799ea07a399 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c496e5324ff645c20fa827b830b9a9b3b17e73c1843ced5ccdac4f21b9c6e3ca +size 46596 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..444dae7b6a56836848dab8c7f80766b58f3860bb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fafec4918be3efe46564616cb88f56996e4d9c1bd6a7d106d9da481830ba1d60 +size 54143 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71ae7965857f537733fbd49593caa3a5db7abaeb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efd3e0fef0aa147b6932705a76a6896070ef177018ed3d5fc30d898c50f947d6 +size 72851 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d63e59c1f6358b10ee0a1c98e8317bce9f7e7c9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49b12d0a376eaf70ed5a91f736932f4063b27cfe1d9400d56dede8c1f607e3e +size 34820 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25d9ca76566315683cf095bf0f19dca21f83969a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea001881405d58404cd1906a0a19be04746fc056a2a9d1f31dcbab3b38e843f4 +size 38923 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a52b1536188a038cbc8142024426ffb25414ac7d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5dafb8eef1bd0c8c9a2f35139578a6ac04c7e4b6630b24ee38075d4d308980e +size 76762 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_099/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_099/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a890f3023a04d827e50f3e8ac02c5cb5667a0a6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_099/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ff33ea170bcc0b0bacdffaafe3c15f11c1eb354530a736980f05c8220df087 +size 21022 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d7f7697253e8974f36ec3d0574fa4ebfaab2809 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde0181e8491310a5684df00575da978d29ab5701b9006a9c0ed73066af56ebf +size 17466 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d5f7ee96772d8dacf7137ecad585b815931bfcb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c521b4c9691aa65d3160aa000bed58d64575f815c0afe03c5d47d2e1ea3992a0 +size 76167 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0ccccd56d8fe1dfe300ef70c252c28081bdf79b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:501e60e6f242f96e34ee62d3a0a4d1011d662649ea2c963a90d87e749ddaaf30 +size 27285 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee5a0378758ae0bf485fdfa137412b7e2bc15ccb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8a6d0736ddbe45958cf9aa99124a36b186839da26936974757b56adde382ac3 +size 44702 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f118f98209e75fc7f92948bd4276ab8bc55a3246 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7390976a0145b1600ee608d2203ee4645b5c6c904f1419c7f7c06367e275394b +size 51341 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e55fb2f1da8cadf3824a4d585b600de7a48b711f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a64ae83f26514cac2383522a77d3a565a2b0449eb6b8e06a59eef837eb12b747 +size 63296 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86781884fad7e44f339239677ed2bb4b76bcc6b5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0621b597289d93382b210af6c35fdc77cb60d0e070177afbbaddf6c759b269 +size 70319 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c7d9eb6622de931529ead55e764919bf9daf909 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f82838e19759847ee4523dcb189d71cb727433d2b22712acc7829cf0e7976e2c +size 36758 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02508a340ccd68e50b88589388d1a09e948c3db9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af2596b5cb8bfa0f95a0eb60575d92c0c8d434eec38bc10eecb29fefaa0787a8 +size 47927 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79af8112faa5e2124292cb4474f7ba0007946da0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a39857d3e1c53c297e5bbcad5f7977ffb4a30b0c0d30da32f53c76943b91a652 +size 72798 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af5c6bbd4fe7f56263ab901be4c52c6b39f31811 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4cc69a63d1e04e6656ec8a7c09861b7d5161bd5a55486edb255ad75ac8c3182 +size 33365 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de0524e47d95e665d47168dedf8250f78b757250 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e54a2ede5f0a1ad8d7624c8b7247dc418add5cacbccf51b617b5593d65f6d9ea +size 36838 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..489dd110367c181c390da94ddbd4a76f03ce4532 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb88494e28a3e51d1daea2e0fb362e7907bb1fcc797920a732b31b6916c055a1 +size 32927 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf3826c0ae9cf2ab5c955d50b17c49e9dc44a845 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a7288ec9ff19a7d0d94f308a7533f5c8582324582a5b9aa7a9b2c2e97ffec0 +size 18607 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c0642dfae573f7ccae66821ddaabbf3d747fcc5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cd77363f3b1816b14057847cbe760e353026fea9a36d0d94a95f0e9d7fbc47f +size 20918 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eee25d93da557ca927b10325d637ade0a79460bb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14493bd5aedc14d487cc2f552ff4daadf74168c8dad6448610c3cb183e8832a3 +size 13793 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4623961fdbdfa82785d05ad222e6faefa5f7eac9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81cf903ba2afbf4545de74c70fd5ebfba350569002cb72a2d6fec1bc52d3236f +size 33656 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..224211a542892c47e9a8f9997ce792447042e740 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88737f0e8614c782dd8ebc3dac987a23a31e4727ea96034421cb1dcbb2f97292 +size 41576 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ddc7088092d4f24e3d41da2983c6512c111aa12 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6945c50f8059d624338fe262b500b73946eb9a27b621f22ee79662dda9270cb +size 56743 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5fa32fa9b2ac920079f217c72b186a1cf85a91f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1160d9b9919011ef8a98f05fdbfbf8e8386cc5016aed87c24b7f4f7be023970b +size 26862 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..271183495d8b3a65948e08595b5e0df938b0a05b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c4b9d26eaed04ebebe268300a84e4c9c9a1cc15be8715a5268e14e339eb9e6 +size 14710 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a02490f17882a31182b9e7978944e1d501ef670 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e96199a7b88bbadd72702ea2d66679d221a9b3640f257cc8f1b3a924cfb1078 +size 46131 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5abe3f71b252ec534850fbf7de82c850ff9f8c2c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46ace90cfa431f8559be48a0a1bbac08b97af26e05943576d655a6acd9e3e048 +size 53579 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05295a2a002288269ae2d2ddf436bee71eed834d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26a292d2d69e5aa705726f6800d86f2176583bda6b6a4e324f030e2315b2637 +size 73409 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e05b6edebeb0f29626eea87d1d2bb2c30a97070c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea4b0e72588ba32379c1d85fe73debf464ad7ed00464924c159536858b908d3 +size 34956 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a62803315c7e5df8035e495387f63c43514e84e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5cb4e7e0361387485a396123b43aed19bc709a7b2d403601c081a9902d13e14 +size 38998 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad18f6c530fb3c1efe8fdda852596a1b61579b00 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddf67844ce520d92d4f33b8033428514620135ff100cea8a19e76a94edb5e40e +size 76181 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_102/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_102/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c38b055752b90afcf636c8089941865a8d214be --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_102/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5948e59c3f21f051df8304063e3a5e44e7b0bce2ddfc2a6ba6b8a85aeff9da33 +size 21003 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f9e91745d8278bfb4a05aa701452044a9dd693c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fcea4bb62b6feafe92a0e68f6a3561a033a1e1a1fae9d484dec6b63014769bb +size 17602 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc74d5c1237fb15a0672e681cb2598ca3cc15247 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eca16c73d725de62619799296a184b1d60a589362350c9d553f467583145db2 +size 76173 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c49b42f65129674961246b8f9e6b6686ecd50a56 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d028fc918ac2468520fa4085029cb2b199178b73412e58da29c542e643975f1 +size 26929 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27d034982aa71652ccc101608911e5d8b090e5d2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b64fb01223b020a680b73a12387ae380c3b25c926ad6f45ecac12025fa93ab0 +size 44930 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f48adb84d8a67ac1b5df27356e3758331ac5ba9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:691cb1aa63523ed41e69f05f0b3df833001722a22d109991a30aa9eb334dce0a +size 51558 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e3b9d7310ff53bd6b8d44773742f0b7254d028b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f78f4bf76e4bba86f881e19f6b694ba90ed520e23585489da86b51e603336f0 +size 63195 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fc9e7d79ea16a77f3028307a3345cc84de6b1f1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b919f5690acf0240bf39ed59fa930f688441ff6eee57222dcfebf2319522cbad +size 72380 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f50a3387c854a382c89d24201358e2a2cb121631 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:540a9d2dc0a3a42b1346a436b4c75200373a3a0733d86042bdaa1956de5da2f6 +size 36964 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d0077005be747be08948a38cd11876d8b5cd02f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cba9a829edbc1740c92de7e97e41e5c0939c2b1c3dbfc07623342770748a27e8 +size 53140 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb42e33c6a91d5f3a538ae37b45098763dd4bfd3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc11617edcc7ab5edf5bfd8ce95c12d55661500110f115355c509f5448b90ebf +size 80605 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a46a8e93cce7af58f7b8017909aa8850cb73f50 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab4aa95bea283985692db40a802690cdf54fb907a2e06f8aac540d6f34ddbca4 +size 32994 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..524c2484ebf388c62dea726e82c3942ece941afb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21401041203210547fc63966339455e8da607d505487989cedc8d7c503c50e +size 37179 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d20e0836f2266ee793d1601d69f1cd0cb389241f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d795ff42ae059d1435a1c45d3b6006b96d47a503008a3dd2bc6a3b2d91901aa0 +size 33816 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94789dc4a86178a924d3e7dfe7241a8046e305c5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d01251fee5dd6d39119bae51e8491da30b8cdfe23c90a989c4cddbf698f87723 +size 19428 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d66590a5c61fe316cdb1cee477c54bd2f0cd9b2f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ab9961ccb3f0bfd09dc012d3e85c7e8e70544a48be7e6b490d1cd5b6636d26 +size 20905 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d8b572d942e3081003f9d4050da077a641ddac4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e357d4f6d180ec0c69b3ff550c068514bf15e6e7cc4efcf5abe1f9ab4378e71 +size 13464 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4513f509781c4a2f5c817c8fedd137001da8cab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032496ab5235b444bcc43c12a36bc18be278d8bda54f1603f1548f9bc8cdb2a2 +size 33826 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..028d545016afef56fc80e560314952cbe929e4fd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05e8d72191ee8ce7adde26fd23e86e5df28d060618b8a5e6aeb7d95ebdc161ac +size 40414 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4fe97ba3ed57f08a2ee9c8cf13c6581092a5422 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa83a82ab725e37f3262cb0737f9f3e74cccbe9b09eb7402036ca48968c03d92 +size 59301 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e72d06893293a8845b3fd477c012ddb82945dfb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3deb78ebfbf98d44ddfd4d1ebcced7e4c9911caff9f3b600d07a81d5c30fea98 +size 27729 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6e584096ba23843a600d6e900817a9ecbe019de --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a538983a9c0114c70fd397f5a70c296d24da0b02f3d8d9c796639c06ab2821fb +size 14832 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49ab100c78d1740dd3184e8d8702fa8d997bd7ee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6217fe04da4a6b506b3deedd6e50aaa67b7e52d8340f5dc21a58ec1134cc43c0 +size 45280 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ad5b116c786338ca09a3264b1900fee2850e3eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7d03ce842bc99e775bd95326d98b511d14684d465dfb214f7467482719abe66 +size 53848 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed19858773a35ee74c089ecfe600d8fe1bcdde49 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4742c2ee5fda4d8643f14ef7262d26187906686695f0f03707b2e1722fc16efa +size 73199 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cca955bb99c153b3204ff6fe4cbb8650282c702 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad1d243389b0219e72fb25a899c2d1960df520c5ab84fcae9bf9bd8279bea262 +size 34793 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8b99c7d410826898f868d72646834dd5f081c44 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c642ed605ba00074d0ec5bffa25ded4672e4babc43b2d773564f7679de0331d4 +size 39080 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..449d58bd459ca1ea250342f9bd295d2a2ea92e26 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b942220e9d99831e21651124d5df73a29749f919826733068c76e74be5200b88 +size 75117 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_105/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_105/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a45028f47e7cc2d7d7ddde6a3eac41cc9c7ea483 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_105/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba892b08cc1734a2262226e79c0d9dbe9bca6f216d4bb19d74a313dd46a7913 +size 20998 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6ea0112d15ce5e117f9893f40008fcf18868285 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e378f9b7f85f8b9a1f2037f9f4745d25625f478243a6e2501c1cb2f6593a29fd +size 17463 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d2ac0140f6a06cfa0472660eb5888447c34b5eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd723d6703a593f0fd944f751a056bad777b531aec4cb3748479f0101b9a6e8 +size 75373 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9109b9cc19c2149b67c143fabb1bf11e293ef8f4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b02fe3e74afeb6b673cffd95cc837c51835da9df28d309a8055d0c770fcd7b4 +size 26953 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2986f04dab90cad4bf5641a85741d62dcf32d37 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:113eb629baec93cfa677aa87edd927adef50b8480a4f9b0f7502e03e8435329e +size 45373 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcaf7a522a55a531842a4c00d22b7e751c8e4960 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8e43dfc1202dad0a1fd5e51cd9723f6788f0ed255a693840ee13bd37f62583b +size 48514 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..908d390a11eb29c83d0a5e27a56f9c391d282c0e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2049b7e52ed68440bb23315b082620608e47763d6d38912d7eaac41db672c9c +size 63179 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2698a0e0f500b0e747a19646f3f11195f97ce006 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ebdaeae9bf7697f1c4f2f444052880125e55fa28ba0dae47f7283b2a541e040 +size 78592 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..437d8c1950157442fd84942ade0d43c4cdbba06c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a041fe1b9546ccc16b94936f1d4f4e7d752cc7d65106a5ecbf42d02fd0890a65 +size 37149 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed26938da6427c875c1e7257df0c2edf10d05eeb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46fb002982913aa1e5f7a177f616474139616cae13fff6dd7e8040a152c50b0c +size 48551 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50424f6d1368a4cb2d6c432c6107b1ed47d5eb72 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:757df1a3c0bfa88372999549f03e83f32116401e5533540684acaa94318d4a75 +size 69141 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6dc0e8f5fefb64dd90e7746f1f338b483c456212 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab5f4a275142fd0bd49ee8a72af58b73189189f8326809626d79169e7deec467 +size 33384 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0aec719aa5aa42d9cb9bbb75f636d200b97009b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec30312a104f86528cdf4dc28df023ea37adca5a31676632e7cf424dc40b515f +size 36118 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..679cc41b65eaea102a54ea3c517aa01260eefca4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b55100ba3209b204d2d16fd302050c6d6ce4923ec4ab25e9a7ae29e68c36daa3 +size 32896 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c7d17cc7fd14a179df667f1ec8221a34d3ff7e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e1717d759225b682c7d37f95305e8cb8045bf111e0215e5f789b54982a274b1 +size 18997 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..845a93189163d0cb1fb8e221958f00293435e520 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bbec0118755cb98255b6f2750a26f1cc4f665b2d36b1cbea74923e50e9e78df +size 20876 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06075193c64ace78710adb70e9c3087e3d09319e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:831ad92794af50aff94e09044efd8ee7c933f69a6fdf97ef33496735096385ea +size 13075 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79b777c853dacf3421d247d70b539c1e8415db15 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dbf30d5ea6e63b9bdc1e7247fcaab51693324338ac704e309e9bf5600d056d7 +size 33623 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5982af2a7f6533d90d30704019be881718717a3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0655eaf6c35e051781ff26177f51e8e74db9dbe6afeb47b4184594a2daac960 +size 41038 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6eaa2c8eaaf8941b1fbc76aafbf50f56ffcb6c4a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcfa2c97c2ed961719ae58407f00313306496daaf80f1094cb0c21dff6b01f60 +size 56405 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2de6f12156b9ac48bc8a9f9a51106a4e7890583c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:968a6bd8c25dbd9e79d31ea0aed85681fb9874b9bbde8f39ec91281f0044dbd2 +size 28131 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d065f66eceac67dc2ee3534b04757975e23df70 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0fc61dec6a07de3480afd1cd044518a6a929a577d18017307304b53e8744a8d +size 14867 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53039b5659a50a445d484025f5ca3d7fe7469bca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c08a513dc6b161cc5a99234f0fcd2eac94b4704b7dd2ce5e5089adc72bc61aa +size 45294 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a758753f547f7a524c7548ea02f081b5bab2ea07 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffd74691c82be19f52bf2fc3985e3d14bd775f8cdbc9c0a645780b1829e3d5b3 +size 53766 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54c9e5ad53010b5fab3ec23ecda8c3689e528c8c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc2ecc5a37630d42cf195623d39f05124c71f54f0dda1a282f98cde6a1d2fda2 +size 72711 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8d4d00ab12d6a185ed229704bde28b101574533 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8740bac4f6c6cebaa368b290140345df01afaeae105e2f41bc4a41be9486ba17 +size 34841 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c92d7263cf3c4727d7f4829e58bdf98c0b88c522 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32a7dd5a505daefaa16b40848a4829b08bd9b6ed7e08d4ea6743d522c3fc0150 +size 38634 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebf3787710da33e0a3b506058bb62ef36c06d8ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a122f719168b1de5dd2bed283344bf10db0d2ac78d7b1b9f20cbdc5287229f5 +size 76423 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_108/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_108/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6718f585b22d7ee96a55c853cdd15da0e7b58d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_108/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e11e2a026feefe65439468b99f438a4d1f164e94c01d5d410b4d4f1f9f7d7456 +size 20965 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d4f3c09550df0905a95df19de1f0102a975e5ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e84f1aa98fc4f577816a3c9a93269ca8151eaaeefecab87e4c846d28701664e4 +size 17223 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1deb17a655a5c006d8e9b53ab1cb2ddf1205ba26 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbd3843ad7c6c66891cf8237e8b0e05dab0294ba8bedc7f637e42b5ee3595c8a +size 74858 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57ebd39617554e02be3faa291e58c37699a2a0ee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1dc1f504c50175a3dc4d5f8c618496dc19348ef71e512c89c9e15a362deb763 +size 27291 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf63ba0f155002048667930c5a95765966f618a8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0c3dc1086cf6712bdda2892d4316444be37620719910090a558a8276a463574 +size 43715 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ac9105edaaffc9bfb7f0399f825e75add824349 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dfbb320a448993e85d4730456d2f371a7ba40bbcbea8639dd0b151c6e3e43cd +size 47851 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1f53c6a1ea1721bf0f97d0f9814b466746da829 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:254b32429af986d896e9c540ecb24674e46804ced73cd8b04400f188bfa297d5 +size 63176 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a3cdee4611aeba7e051dd1a48c83bc43323fec6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b50b67650fde1c144583f7e8a024dceaaab7eb26eb332f73a056d8756033ec19 +size 74713 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9d97130456f74363705fc89cd08e0b579b1c8e2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7451939b65bdcda4d49e9281063ae2e025467ab683389833ccc1d9d0991dbe +size 37103 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2f44ddf96707f1df2e4c749a13f148adea711a8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d5904acb4a0752c3dc259923e37b660fd9c1a79544d67c6f38bbbd4c16a006 +size 47364 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d746858d2f70ab2af3bf912cb8ff175bbf422d48 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6987fc131b2d6a662c581f3a1b13b04d206ba89e072c83f5ada9520b4bb90ef1 +size 64766 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..973816a565d083ede55c09f400260d7e0bbc65ae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f34aa86aff21ea80c7b3c1bc76a4d75ae1dad3401ec654c847dae5e2c800a7b +size 33231 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d67883f8d82323cfbc8ff0d09366ae1e7a39ccf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712e267542cc96e646357163231eb6fa71831bada3a955fb049842a5ebc0dc57 +size 37130 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..681d744096635e3e4da3e492ddc7e8009f1b5417 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:601dcdcf7f46234560805acb50d0a49cc36da0285a25ef8cbcbbb07a7c66715c +size 33528 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1317f4c2baefac622cbf9d439220b79e065e7735 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d2299930ea274c19428bcd30543ff82ad14d802d42deec99bcb3f823137cfd +size 18845 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a53c8eac80863a29c8ec9351cdbc6eef87fd190 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:227e08e0dce45fa741e1eb0ab2073d6ec507e1dae879a49d0f9b801c00cf9bf7 +size 20917 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..399e9b1a53c7f0669a5de7fcaf424ffc6f8f2dd1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60105b61b7c8907514259efc89cec54ceb997e3f813b70c6faafcfcf3b45ff4c +size 13625 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd1755d8d7ead6e2b3c827d7488f4f25f8f31c54 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17a45d137dc7a99c7a6e57fd74fdfc8b66bde5436a14b0565f2675b2054e4366 +size 35572 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a582e37af5848c7104ccad19d7fda74fb12e591b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6fcb6ade90a31cceb1c8066e4787ca17e480704338020f6270ea1788b719aaf +size 40984 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..860c5e187e6e3d9fe0487001730b59b52602703c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71226aed88afb4f30af18d744016cc8a4e04c3da182bfbd6070cc7c393d500a +size 58539 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a0e7563662afa2a5eb141205a62e86413632677 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73fa47759fb3f43f36d77d10c8c4dd5ce873aa57c79f84304f5c458f35e6e65a +size 27812 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26a76cf4bdf0040498c540b7cd9509d19fe84112 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6459ae2e37ed725deaa3775f798dca2997b5a078c091e9619204932776d079 +size 14677 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b3ac8027c2a1fe7a6050a1aebd2aca2337605b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d542c55eceaeae1f5b26833094ac9cbfe44f4615cd9c71ee946e7c9135d58bfb +size 45741 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47a3ddf055d36f14f0866c265270908684d49a43 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8c6a648c6919a20d0db73ddb51bdb0d3f09814e64fd80d0a7bb2d3fdbd13f4c +size 53496 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e45d42124618f9eff4ff4ed2dbb0e26fcfc0882 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be22a0e44f692f7123afae4ad0fe0e59218f59a5c49cce2b9628681b2c80c060 +size 72209 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99fdd7725085c85ac4db866f5d0a476f41e38923 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd9f37046133099b957c4fdc6a2f50dfdb9fef952d7b6a6c9de54fb88242d7c +size 34643 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c33656cf3ffc763fef93db7da7b7f44c5cce9997 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4977605540916575adec4972fe658129db12454a0a28901bc4c0b7c62abfcc2 +size 38998 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d28d318e0972672f3f3678d0a3b081f37e2c6843 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0853563605cf5d2ffc76f04fd43fe51ba994c453060b3a0100f2dd866e2a570e +size 76068 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_111/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_111/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a3c23224e7e278b3c74e00ebb23e9b70403b4a4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_111/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59bf1d46b425bafb2786d12d439c99de3549b22c0bc831fb49839312b95a9d6f +size 20971 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da081c395e837b38aff465413247a00b8f00b979 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ad6af98d86a754302aa7ad711a2e500294111decd38c94ac9a1939377517dc9 +size 18712 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..437d6a48d3f923aefd0f0835eee517d0e0239d7c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752fd8d223113ba157697dc0eaf6b57ea9ba31ffb4a6342ce2fd61bffd499d11 +size 74811 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d55fb15c4ba74ae10bc05059d05d683301a8aa48 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a98cd6cb1cd030bdcd3f7e4a27b028bf3ad2aaaee96fbb39d93b6212b30b2da +size 26532 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33fc3248f66f98f2aa8ef04ed3ab5e4f13bc4fde --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a02ca5916aa1c71d1e8a2110f88d2a2ac6b12bd955bbacab2c8215fa51518d04 +size 44057 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5baa848276acfbe48120d05611d7b7defe014c55 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb9d312995874f98d62916c19db613abda06a67f32f51c0b738e8d274fec2a5 +size 59742 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab3105683cd881800468e0a39cce602d216e1cec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf32413a49c7ebe40c92f8d27629cdb086f636da3aebf64917946b89f543a86 +size 63381 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..438690060983c7e928aabb55ff0589d2c00b508b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7abf7ac5390e64c58d214c541f828e7d434cc62987013be9ddbb88925b1c491c +size 80731 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..904b7994ab102cd2f40aeddeb0ab43e3cd2a6e69 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:100373dea619090ad1f1e76ecc8a30688b69a5a9880b912959821f15a6172385 +size 37667 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2538172a9a825651627bd3b555a76a3a4dceffef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8398d8bd7e6e5c0a2b183434d6f9421fb6cf37e137286c64eca6e6bb75acf746 +size 49436 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80691021f56d2d664488ca426e40a80ad78fe07b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d66824cc7a66ef4348400897adc56465c63af7f1e3decefec38d11cf92b9218f +size 68441 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e768788f6b53f7241489971972271a3ba537ff5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f30481f63cb3d17293eff995874bdae0e8e38e43d59426a8f9eed2dd9f1bcee +size 33080 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b5ce8d2bee4b9bfbc389c3af96b2634ce911d1d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:371a183d64abee9a2a8b8635a600e033feba82e11a8b74384f5cbb3a2cb6ee1b +size 36658 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fe51980d29aa3f3f79fe3425fc3ecbe3c95e6a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca108f3edb44b1dab17f6904b58e7cde599f61145b41d11f8e32abc85603f89 +size 33279 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd425e5d3d48d62f6fe53d56e8d22b5dc5a23e95 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19b1bbfa9bf157404bc5478505ba80b65326c61cb635e6f3dfbd1702c202c414 +size 18823 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85940beba8613ef7af7d70130739896019b27b5e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6834768b110299f23f4d3f941267647dafb484c3a947eea0c488e3ca66f10268 +size 20895 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57e9c272f2cbf85f76d3a848abbbdb417e77977f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1133921caec0f2a19bf1405af1f74f1501db4615f754687bbb51136b131248 +size 13817 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28f1932f8c78afbda9747d280ad35c1cacab83d4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb48b1af842ad23611f15ae38c64458cd8c24c0030a94b75743fe96c0187bb36 +size 36136 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec819e9f3ed9b8cafff849087cbf8bfd673cd3fe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959e159ec5a0976cb50c936e49a996eb4123d563d2920f3d66645be2b415d119 +size 41792 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..597f411d0cdd78e65713b2daf5925f3095f2e1d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:742d0efa9de0fe20c046ea89b27a6dd2c0b11915085f4b787ec259696901170d +size 60001 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..793fdfe63f23ed1035ca7b3219545cea0c34e3ad --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7c757d34307b890a25056b623795b92356bc9aec43b9656b658298b1e0ef86f +size 28292 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bed171abfc508ef4c1ab7ec2009a1db5af2b5283 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69124f6b67e7b9f57c2a12c7135734ffd2e6119f050b7b8825c45e15c9f7f9b2 +size 14850 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9dd31631bf6d737be8f25dc59b3be9cf69a1da04 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d040adcee6b8581b59011dcb819da7e190b78dff209bc33a2dd6521eaec8e672 +size 45251 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a486714c9d3d62d964ae4e4b1cb59dacab1d04b8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e344623312df567c591ac118b6c682ba219d3d1e410507115045018d897960ad +size 52864 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c979ffae645a8ef8120f141cc8f9351bfcf4eaa3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:911b3010e8077c3437de0145f8a3e14635dfbab52be186890e23d3ffad27444a +size 71026 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cea99df6c565f63d6150f36fdb8caa42ee685083 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c78b034acbcfb1c6cbbf3d95219024c4168ed3edeb5d0f4eb4687982a6020c30 +size 34916 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c24f134383995c58591fc8a7d6384ff78adf7076 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de42e2bc23b1185afd99328d8532b44c6536219b9839286c2713ea985a876573 +size 39002 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0e728ff27992b9b3a0264391ec1bbbf48f2b088 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c43a49ff0a90252c2a977a9af66bab838a9b36eb0dbb0e6d6f83bbc051d72cc4 +size 77613 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_114/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_114/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..981086a3b6406961940f2d1ffc9c4011fadf10c5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_114/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:283dcdfdd99bff8049f01d0c7cabb5250589161e5603574fed4c5fa5730483b6 +size 20993 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f0f16c3e92f06d6f33b46e91ba6fdc624adadc9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e71707deb1342ea7792a4a418f67ad3e14f8e1a9bece8aa90f757e62bdc4665 +size 16970 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72127b0190127ca03880dba0cf908c6b4bb8ecb8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c536827d237092098305aff7b5d6413ed745d83e1551e749778e9fa4751cf23 +size 77082 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2effe5dc4c910f38aed3273c8bc08679e72b0d2e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f4cef597d35ce21b1ac99b3b105a407e0f89c704755d6d5c75f61c725e3f3e8 +size 26995 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1d5be55917ddf994bb6afee2c39facbf04bb962 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:761275887f6ebc7cc9372d5d5b8e36fde15be0856c9891363ae99d5ec9369fe6 +size 42208 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87e658a1eea141eaf7706e03eef676d07a4a78ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0180f96505e4cfa3c38c83c97a203ce91dc4e94c52463881b9f9ce747e10499 +size 51051 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a47fcb68cef791a5d2d5cdcaa117283eb5bd4af0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3a05cca9a9448c71c95e3aa88a7c35f5c0b73ed5557cee2923c691b7832047c +size 63849 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96d3eeedf8ef6c5e9622ec955533a99e816aefc0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6556824ddf8577398a73c6459cf14b9bb4247f12153e739cbac215b1172337ff +size 79775 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..421cdac0fe3312069722d8b3f7ad9e2720d9b76b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80c8d654074a79157bde89392b4d4fd90ef4d7041e03e88ae443edf5c2c89f30 +size 37780 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4953bc9cf77ed83087273bbad10c598726e14fca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfdd2fb9ffcaa5f162de20808b1eb676b294fd41e3d53f102ec97fc6cdba73db +size 50563 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f94baf51426d0e2fb4f54fd5bde54c5c40cc539 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea825bc99b61d06d0327c31e4088cbfb3c9e55a398c70e09d520f7be68b80a63 +size 68772 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad5ecdcd21cbace186915afa7e3db272e3a37f02 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a1ac614a86078ea7bdbc25d55ce49c40f6a74c229490696788791d32745899e +size 33402 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6838eff5b42da613fdc9e50606be0b3d5bf8aead --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:665f0a1f3b1e9e6b937a7e3172e3f04e141b41ae07a1f78be4d453a8aecdf324 +size 36707 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c35bb06cc49226186eff3a00c67285602c31e32a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fb84a985096748ea59e23d5e1231b09e5455ae4dc3159caee8bf1a8964462ae +size 31827 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..025fc623aedd42614b576fa44a53fb8f0ff4470b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92c323f5ac865c835ff306d957ed334a803ab9236d77c74f902c24ef5263c625 +size 18790 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22f4649acdcc3aa4a9ca55df33ae426c9952bf32 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d744c1a03e595f9920811d1e55d9935b26d01ddd96aa45780ce88729ff5abbdc +size 20862 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff8a644a2d8df2fc364a12ace7ba9d3c73f9c6a0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbaa47416f3ff09723b1c4acd1041bdff4c3199aa887e54d4ff72c216a882624 +size 13408 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d235996f8d520e843b6457357f74cbef78f9898b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e5ad0b79dbe9b7d31f417595420b2239165b37b419bc598741fa2a7622b045 +size 35874 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7768c656c233ea5a23ec4f023d90c086277ae341 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f4aa03d1c94c452ab6e0c7a15b549f3ea4dbef0259be48e7b3c8cdf59498e1e +size 40815 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a2dd7f575ca3efb11bbdac88736b31c62edf703 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c3767a0a39d707929aac59b8ebfcc28405db2ccd9fd3ad7fd692070cc78ed6d +size 57641 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82ce1e7a612f69ac67f5748a670a788e30d4fe0d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9be5b93b7e97003452b311458cf2f2cbf97cabc07761686734a5f3a55a1b9f74 +size 26585 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d39ffb01c2940fae3234a33a8b11568498561cc2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:619b2f065951caa086362cdd054f08db16b4c4d8da58d7e96469ba6f88f7f92d +size 14822 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ea227484a888399347cf97b62e419c201a1e9a4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3e94b4a048cbb7fb225c168fa35b4d1478008076fca1066aeb7633dbb9a204 +size 45385 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01d6bfd1e412a43f0ab5ca7ca21b1fd3ee0d0b13 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e18c7600402be1cd41a5eff8f558ff49bd0809f72f897aa864252cb4c895e7 +size 53071 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05117dce5790a352f0f86086a02e8ba6d8a72541 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e332a974e334f3dea205320a45884872eeb81d91a44f3203757401aaa1ab54e +size 71421 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41138d4d533f3e89a6655266bfd3a335abf21e25 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80cafe57e4c8d328c1c3807a67064681c1085f97f20813a13a379534350d699 +size 34696 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c703706000ba99cf848c930ceebaecbc414649f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eb3878e06d465fc7b0fcb4ca59e8e3edad7afee8a2c5c1804a0c29d99fb807b +size 39119 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3aa06fedef39632448ccb72f03eb34fa343674a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752908b37ba6ee8eeeba9d0b5c08e3a0f9198ceacb006da4d60ae5b84c702b29 +size 76395 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_117/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_117/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa7ef1dc93492dd279b730dd98a378ab09da986b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_117/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7fa91f73c789f318bc1e356ee6e1f6a42eb68fd6937a15cc51f31f355250f11 +size 20959 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e63a29c1cdca49ecae7cbd26c985abcce4ba94ad --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2506ef385ac3ec412da87db2704d1e5ddd60fac05a9ca079f87c7010a371f804 +size 17741 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fee1692a67eb8239ad056e7dbdc5a4df4eebabae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c7d62b36f876caec4ca2763521b11ec9576cfdcea76dfeda6f286cbaad25598 +size 74622 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2dd61048c1741509c30617a449803c832bb51e65 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb11fc72332fa3e38c0f04a2dc7763da10c1808f0ef02324cda04f8b8503dde +size 27082 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f92120c4ae7b4f76ff49e17a44428beec62b38b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4609999c993fd185338459b08fc2798504b26311dddc7594c087383a45a65814 +size 44864 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d06dde4285507dd16637be3a357ba74c1b487e2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6deddaf17fc870380389f100cae100f50bfabf7e3060188472fa344d27bb261a +size 48925 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5a31ea78d9aec68a66fa59eddabe4c27859c99d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e41cd2cba19aa57b720deb6bec8f63fde63e8b8d08757c7f12cdb01cdcfcdb75 +size 63398 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7694eeac7cd641701edb67a7527b8fdcc5570927 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a7c659690e7b7c12e9c1d8faa9bed2f5c7f85ce66485629ea916ac40e4ddfd +size 76505 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44112a197e0209f2dc1b3a0cd91c5701e4feff1f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2900c4a98c9719b459b37f1fe8f43b3dbcbd70e27786a01bc6a9d38e31e64aba +size 37200 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b03cf224da090f6de7c6bec9d7e4c216363bfdae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9917ae8d9c5992cb4a82c9ee9dae2aa56918dcbd829ab81dd4b0a072ab2dd61 +size 50023 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0427b043bc5b7a476e6c8ca3054cadd17f7c6c1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efececa74e1c9eec464381d4787f89bd28e7b9bbf2a5c97a820a4c084d36929c +size 68181 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6079990c92e578b102ce66db961fbc2c3bef6570 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d82c1d92bb13000a74e0746b51ffd30ba6e50a8fc5ce88cd8bfe503c686088 +size 33115 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3ca9a1dcf2b3e194e794fcf6232a74b949601cc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e1bad99ceb3c9296d73417fc903333b2b926ac3aaab0d952e7590dc375b48c6 +size 35617 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c08bef5e4c419543a31b51eae54dde167198273 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb3ce8b048b90078c53a2a83a113573067cff7d93f2c5d33fe1595c411f0aee +size 31688 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7ef1370ba5e62d9c8bafde27bd02318d633ca4f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d884e424da78830b886f011b3352b67a20c024e18baf0d04e4b75d101b448016 +size 18815 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0faca1d2e77d6cbfbdf3af7542a394dbee4e5c2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f8b6af240bc18ced53460fe6be4f66c5e04230980c9e61aad0d34b0f639d1e +size 20960 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41d7f189e4d01cc467783e92aca4f2369648a2e7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83766cd9bbba41681404d8f80a4d042799b25991998f7e7dd1ef49e9bb24e4c8 +size 13070 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..690e99332de312d87ee5a0863971145e35d1059f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1189fb95b31d3bd7896bf82fcd0351618914d09b8d7d3a06988fa2ab8afc7d5f +size 36395 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8533c0986213bffdb4999f367aaf23d96030341 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73df266575476373131496d8f93ea22fcb6c726967a5aa9d1f4d4c353f76684e +size 40766 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..663464decbaf6620ee9926dde595f9a17c16c3bc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca88505d095a2a34b9dd80eca6f444ccbc86e9dae342d767f47f2c22493f5b0 +size 57072 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a62be19fa10834c306ea9dfc7c41f322252c440f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efbda6dff20a0d545987dd0dc73a0552557542432d8ea1188b625bb0c43b030c +size 27864 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..886500eb40dba912bee1f074896a4b998c5a0688 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d63f83e132a612d3a5940f4d45fbd6c1c3c1953553985bfce12cc3ed6ec478f6 +size 14731 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b72d269f7ded4ad7ee84e6abb5bd61a36770b6d0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3bb9bf4df46acd9ce987dbd2dd8fde866d9b917f6cb5e4e1525d49bd3e7dc50 +size 45604 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86186ecc53856f12e9e366991d4ee873d817db7c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c293cb7a069fbad7eda984bd84e2d8e8914a52cfbd557f51d5454fa8534cbecb +size 53608 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31ad08c549c86cfbcd98fcaae6beb07a1aa9096e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac89f35c6d7ab45fcb15ffdf79077d42492880ea46c284accdbd573707b2029 +size 73489 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b47a19d226458afc891af0ae4105195dac673758 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54605f59d646efb8c99e69cc11b9c36a70ca5d5837b4253207ab02a6d009e76 +size 34891 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e229bdc909eb43f6456b49c5df3fad71ae92498 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56112d9942f68092301ffcf20accec205eac3ac977d2e9635586ad46f22bb2f0 +size 39182 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c381e554705d760617a2a38785929ef4876b0880 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2b77a876d9c7f4d3f01464f496d03fb4f17fe935208502e870349e8ddb87a3 +size 76803 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_120/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_120/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5943fb45cc44a42ba78d0853dc1717dd100f703 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_120/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e09896b28f296eef570caa88e2e9c9938a7658a7e0f3c5544d8e5a1e88b0bc98 +size 20998 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6540484d2c839109d565e31fb8e5bdd83453c75 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864849baf4639e7218db1fdd1a5fe9b3d4dd404fb4514db71c9d3893300048e2 +size 17261 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90fc575efb3a0c71a039fcd22be42af56962ef51 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1124652fb22e1a1f3a13c69226b077bffd2a3870bce25edef1ba309ba92940d4 +size 74418 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5eb6dcf68447856ca46671aa888b7b9d0a3b6e63 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9bab6addafaaf49ceba891a068c2586e09e92af0045b1521a83bdf724d2efc +size 26959 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8ad873b86edbdc434ede2933a50f3c440f9e9cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eebbb4e1887598fa9fd5e75bc973e790f7382f788fc6d08a11735fc2ed127fa6 +size 45346 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..064f0ab087996832c66cb720609e17b2da115d35 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dad4f49afdd071ebf634b5dcd432d1861e2f391bbe66b1fdd34b60bd1a57a76 +size 48618 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79d77800483b4d33cb81044eca7834b9a15f30db --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b54e85359ce461e20c1e04d92e0fb4480b77d7067b661e14ed595897f782ff0 +size 63335 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c405ee842d338668da3dbf74c7a74d4dabd20779 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19f63acb8488014dc8dc7178798976aa8e6378b697da0e0942383975a4dc3ee7 +size 78191 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6312952b3e13a491299f8194378bbdd604bb7d3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ed9b08f549e11d3695d1feed7be22e48dc243e0d8a683c9033a207666285e7 +size 37000 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdbfaaacea8c6d0bb47ccb1435c2ed74b84cfb9c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d228a7dd5d7db0a0fe86d4b9fe1803cc3b878da53cd546849b72bb615fb3cd9f +size 49257 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4c066b96551d842aa18673e69ce3dac9f716208 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808f8df1535b6fc961aeb921ba440e57898cc538422d17daf93f046d2fc5c912 +size 69359 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56089293ab684dd674ae2145b0ae3a4e3f7306d1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06a7ac3f298db50a20c0b524dc7aa920e88cfef26b0a27bd5cc60ab807346bf4 +size 33019 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f9c8fdb2931501e57d76f42f7ccfb5b1ddeb864 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d2bd56da25b891c1063fec19825693c1dde5246f98b350f273f18ae6d17f2a7 +size 35732 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40f3b76beb9ccbcc1ebd506476b147ac4d0a6c1a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653d2f149f7878b94c9c6016cb98b118a326da180d1a0f9c5d7a1e238e133cef +size 32965 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd7223798e7ac0a819175ccbea1050c2ea9149de --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e60a5e2f61b2569b1e4ab366ff597884ad437665d6f64d63cb1a58eb1c74ddf2 +size 18616 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..604b3bd0cdaefff9830e138814e911a42c036011 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b85faf69949b422b2258ecebb4bfa447059bc2b590b1cc22ca5e17adbe9d7ace +size 20999 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb5de21caa9d2eb81aa9ee477669576df1f1f10a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa3cc6314293cb6f80bcee9ca84c4dbb88b489e66a535df521eedbfd6d62369b +size 13137 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46cb9db9ca6e0a713cc011569ef76fb7a3c036e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81a08204e6f9b6825ba0ca6ebe72f19ddfa617f56294cf92ce400f3c37359ff9 +size 36659 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a40d78a1561f70891479530dc0fec1fea3776283 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67b111a082ea534a6f89f27c719dbffc54364afad120a4bbc503869d1af27864 +size 41374 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4bee336424635bd74707ef538416a852e8d8033 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37caa48ba7d5a40ec545018f7201b3445e287f18f0d9db66e2f6961439cfc8e5 +size 58299 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ad54715e471b3efc7262cc703b07d790ec472a9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff2d071dca2cf6ed098c80191d5969b21f74641d44c7933246603a4e2c90601a +size 27683 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..634347d78bd0d9e0e3a98e0f6fd41d7978dec5f1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42b9ea4d1adefc363b937e390b21951ca9fd4096d38baeff6113beb1506771d4 +size 14631 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97ca1dbfec1071bdd43a496f86b996e42cc1aea8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3784fd3571dc78055fd16f2672be5d56523f777a903788b58d30048b3a0e34fd +size 45276 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85250c4f577c679b393eae30810883b8fd6be4ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94e6b5ef466ca4425331bbf4ae9a71c71ddd50214f86e63e9c998de03ea0bd0f +size 54011 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a3e4e7ec329e3da9d8f07b4ad14b64852ed942c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da6a2e0612cf5ae70333fe4168872c9914d2ea8b79e515be9c58c8eaee933ad1 +size 73036 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99b17474eb988d5db6151f8fa492db55fa4720c1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7e07c2803b3d8b8afd3fb9ad40b7e5ac846bacbf0791300a8de49abffdc7367 +size 34856 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d89214efd0e2c1f87cca21dc77802bad3db4f14 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfa7f9f1f58dca9edd0a3cd7407518f5f54449625465fc28269e302916918e53 +size 39311 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37d03e4527e6b39096fa7c3480ee3c7ae3917e3d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6159b9d9bfd81810aa1c38ddee59a82ace3b788f7b548e5b12bd62c6cae251de +size 77812 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_123/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_123/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..207bb358e02608327c8a2365adb72247d002301c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_123/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab980ca0499dc4c8bda32edfefb1df28e458ce85055ffdd1c1928da1c4e753a +size 20997 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..748b1a112da3ec56b847b1bd5e46ea8536f0babe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a484aee82963492858d3935e6593f4473eb25e89d72984c7346d313a56ed677c +size 18433 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..732114bc140b1bb98b8b0fa630d75868fad36f9b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b93ecb0070b3b18b789655b21fd9038f4db293368293d928490dbb4c0335a05 +size 74437 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fd374d90ddafa5b5e4a3c49600b70b6153e71a4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d515e7df5e8bca652a4d24d89e817dcfa8470eaadf0d74e15d51ade11fc253f +size 27265 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a27e05f7ee7bb5971e5d68553b9e668e48610b4f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ecf12193a941929c7d9f44383a4b50d248dcd6a396feb917e5774d08f221c2f +size 44729 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdbe86cbebe278e4bdc7dab60fb116cfac22d58b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa51175528a38fbe7f82a98c91a19cb922bac81985dc86711ec690c5383b04f +size 51551 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8a3e350675a96a01ccf91b2105724919b2d4e29 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e03e51263090d8f948ca9b566667719315176be6032b9ea1323c019afddc1111 +size 63210 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09898b802099033ab98458a25840695ad4a8c659 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b721e7c660863e4c8b004e801f90777e5b18fc901f113fab80b03ee621d79bd +size 79121 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0afd7fb004bcf8d128316acfc4ffe1ace012341f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7894c2101198ac3b34cf1baa0cfd6c853ecdf2104c3061d742be19adce2f7630 +size 38009 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..694dc5abdc2ba8ea7da7d7dd6801e1e87b7f72da --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6c64f57ba6af1ab5229662e67896c71b9d8fd010c463606e787951d48f428cd +size 50239 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70c687184f237a6212dec6a320305f2337a5e990 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04c1eb066183e85dc47b61d831fb54af56cc50069a2435cf8e7932a348224836 +size 67681 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36ff8185987eadb881092f8d513f324c9580ee2e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955435244e7c4b514b43b7899358fac805399c990b23f2c3dce3409900fcbd3f +size 33250 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4ab9aef57a6c6f80ee250548b58a66c3a801d6b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afb04c644cf6c6d6efb01ec5ff34ac2e535a133180d95c47ec4628843a708aaa +size 36091 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d25670369ae9b1b9957baa8b067401ae5a06f7b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b28b213fe2235afa932a255293c4a4130ed1eb48581f06af16ae27d4c699f080 +size 33778 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff92a0f274c0b62c7f3eea05c67d775a2bbd0949 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41bc72997d294974bf701e71fda2982c9db687b25429d1873f39c2ca06dbf4df +size 18526 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5fd24b30ad83cf5b35b1c1452691e2e498c307a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa0ba0ec3996011a6a51e652dd580ea93a294ffe7c8ecb4ca83c8ddb8b11a98 +size 21050 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7350a945dd796cce85f31e245756439ac5926e30 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4889536898b9eb13939292f17802f9b63f8c238336535c82dbdb701a1cdb3b3d +size 13506 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08ffa3f85c13140a23a8d65cc35a9bb7a0f8c1b0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbcda2a9548df18ba86aa33f369404a41523dba60d0ad8c920e063ce76407e63 +size 35403 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39982408df1b01dba25ef498b779a503c5227616 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4672d7526c8c9a360f2a1a7d9880b1d9f8c9945ec52f3109e45ec2428a49cd64 +size 42225 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb03924625e90ee953f629a6f6af51091bc98d13 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ec26fff426238907acedc8e7a20529c54bacee19413325a64d3864e56bfa4be +size 58518 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f709fdb8ea36429eb168e70208459a4173b3832 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:838a6ab37d66ed6dca133a41cd7d7c4c6cfde0e801a91acb0362d9785f486a20 +size 25674 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff01ae092afa8e9c4b8dd3ef46c3cba7b6acf858 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df123eaf0cf7f3c1c4b058a84fc494e3194ebdab43716f49ba0e1d60c4efeab +size 14902 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66e5b1f225ecf2cfb26c35439cbe5c34f9f02bf8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc6f57d2b7cc994feaebf1e7edaaca605928cefe25bd2227e8afe23fbf5de998 +size 45157 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb3fb3a8bb0bde5d84935c5e668463adff4f6b33 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7931cceb694ad67f93301de4ee84454182974946337cb9e25ac2b4d980545c +size 53480 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53fdd7df4e23821f66f2a4a6851c497c0c87aa5a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5579d29c8d899557a44348f85f71605fb160581abee083c7ad625bb4c3c03734 +size 73478 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5530a8ed1e961e903f09fc5c435cf5f24f1d333 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48547428247fe56b0d883702968c44668c6a7299583f2aedaed3b0f1ac6906b7 +size 34741 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8880b67cb266bcd62b17bcfd4507966420a280b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21cd613a1c7c6e1cb3bb5f313d70f4b96e739facbd9755d4b9784a5dec313565 +size 39202 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d2b92d7ba527a72156edf6f379c7f67d70edb02 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92844b722431ef491d168293b4e3129ea63b6ef41d46eb72c4a002040dd60c5 +size 77114 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_126/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_126/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccc7adeeddc45fefb19b36bae1bfe9d535618e59 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_126/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dc7e64c4a41039e5fa194927c9faa2be32f1faaeac50b81211daf93a8dd7bd3 +size 20971 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e3d5b4483ddf4a621d78215a250c14f50f5a602 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ebef02d61977afb356d24275f4f8843deea6c2521f70cb0ead8cccb36a13cd2 +size 18910 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4a2ad90dd5888b787001879bc4bc935ecc1f05f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312a0feda07260afbc5661ca1c2d88ad0accf5b349487ab261a021a1a240b5c1 +size 75647 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fe9728822d4e298396a4b9bdef97a6e7250f418 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5299a44b1a2cb223b3bfb4ed15cf4daa284b82960a9b22ea2cf75d42bfe0b518 +size 27284 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f1801775de8031add9a4fab96e77ed55e210c1d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29ed4171d6183ad1ccc55da602a9cd1d25b02fd5bdb6d3cf02b773d86656c4ee +size 45232 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd152141c6c327151ff180cea63dfc71e01ec01e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10ea06b24bc429a9521514e4aa823a422c377bb291264f2c59d216aeb7896cc3 +size 54848 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbd351842e641a0f99ac6d53f95d1ec127e3b8b5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5dc674fc297321f57b345aa7906ac7b02b53c86b2199a99e2f8c3363d9b7ad7 +size 63914 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36c85f64ee29aa40c9431db78fb3febcf5378d4b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31fc64b984d8b3cdf82b19875fc7e915a7140732c0a42e706152ddf9ceb1018c +size 81857 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de6fb180830ef36bef56f20c14d285216e8eab41 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9f551be5cd9a0d1bcc38e1d95eca81ed7b4449cfa2543518c801424d467cdaa +size 36915 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b3e54a025a472a23213a31af7d3fe95f105a76d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45900c3d5ee60f0f646829abdf75232cf318da42bdf4da4a2ca3be2136dfff57 +size 45728 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a2a4973c3f319a0097eaa2ba50f6d7b4ca2ebe2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faa69f3c43de0ca9e036bf455ef68f21c092392c8ee541790ac91dd3ce8975b7 +size 62984 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20973a12a9a1276910221978135c937c8c20863e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ab2b5f79b4f67f8f0883c04acd42f88db71fb9369071084f0c135baa08dee4 +size 33345 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d711bd75e2d8ec1cad14e97bc8f079479f3323e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd28e3452ed164f3add88695e6ac66d4e488665ee5bf463db326bb953926e58 +size 36699 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccf38f76c29a02cc92fa0458bcb642b8f9494ab3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baada84138da5d7088a526e694d26a1804e2806d5d3cd47620f1175c861b6b2b +size 33084 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0485d30bb6fb1732594f19824bf40d7a91f39b57 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:912a880edeb6ebd631b111d63b185e4b0f2e96d4c4c7fa83e4333500571bed0b +size 18813 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f35ae4cbc17805d091ae753b7bf9db55d63b8fbc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f39ef42e37753c65b0aeffd19d19a3b76b46bc82dc50848d72bc05449e2d5a2 +size 20881 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..278d1c5eaacce7f5a6c200e455a9b3ca8d51e18b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95ea3521fbc33ce2b142d2c983154b38f3d39079881e1be9b10a48e4c47d9a2a +size 13212 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4b17ade4bce37d23a9bb1734987cde3ec5fe4da --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae982ef5a876e59e22260b8ce86e9e5cae66f2a7fc74035f94e9906543d8120c +size 34895 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c283e375faf150a2d8437c4bd2c6f85106bddd66 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608ae98701154347ac6f8f2ee9a324dcd7e6c8baee21f96fed71aab205175da0 +size 41905 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33a75474071539637e3698c3e78d3653fa51fcd8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dffa073320280e111a54db33dafba83790d97555a804a32a8f8d5dc2a0275d34 +size 57563 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57425f785b5c57af8347059dd150fab8a8e0534c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e741956a6b4ccccfb56746f3b4b2f4a047438f1a60b53861b69b83f884bc1a00 +size 27983 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4861fec72493f084d9d87ff5bba2cbe66588da7f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e438a63d3981062bb4a23ec729143dee7425e8ab4274d0551424116b3cc789ed +size 14739 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0cf5fd15779f9bc60c8447a72714f9af3653a64 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c283bdb75324ea547c826decbc1516925f5bb515b479d960c6f1a590bfaaf5bb +size 45173 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..daac485dd25b7baaa83dff71794db18ffb5067ad --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80b9ca99f57a8204a62d5113a016539ab67d722fe03cac4056fde5dd28d3d40e +size 53198 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc8b2bcca1739907b64e78b6fe5b4a4d244b1cd9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e094fc7a1c457d67fe4c0ec0266fe44ec2903a2ec656310ee1cf024e44f7986 +size 73011 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b0dfc76a83d1a8db772b66b09c6ec0d0d395989 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd4af3ea4582743a5d62098f1fdc27bdeabf9b0dffa4a166a9c5751fe0ee8e44 +size 34678 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74247ac0c2faa9c6d82945952a31b9f35d8303dd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad78177ed69e17091575def6846a89953f3470e4faf899f1e6279a487f01b07 +size 39269 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..448148ef20c050af976694dab221dabd359e6a31 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d34c74130a56991e946a39bd705df41ace1b5e8b26cd26edde25c198eec67fd +size 75769 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_129/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_129/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ebcf7ac912101cbbf00fde3ec349f8f5936429c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_129/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54d6ce12dc1ce6819711f55134ac514ffaca6eee05d576665a7e3d12ca3c62b +size 20983 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9dbee12fa7bf9fc89e51a80c2821a20b64e4205 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b9cc470ac6bcbf74fada0ca1ec06c22fdeb7d715c79e31eac35378b3d8a4afa +size 19624 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21a850b6b301a5de0b6c6f7e41d9125b06e72002 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3babc370638853ed7b21935f5cccb41350b0b9c60bcb47d0d85444213066b61d +size 75958 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..238ef849ec7fafe5e1e23ef1f75ccaa315159b3d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a108bfc7ac1005739051fb09be3afdaa9ffaa8cb49931c7a9502579bb90ff7b +size 27335 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f0b83b33dff315c9a0887a4b32537f70f35a04a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b377400fcd50a7ec40f8fad172cd59d0ecabf38a901764d604393d45322b0fac +size 41957 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88a4a3d14c7c757dbd3add27e847d982eea60cb3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2a4f87c973ea9a114b7e9242ab80460132eb82d877c844d134b439572ca1d4 +size 44677 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..807862eb45d26cf24b8df8eef93999657fd9d024 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9d58d6f6d8850ab4ed46f80b58520f6a9be9357dd276f66e2f5c233f541654f +size 63693 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3dfe05f8707d48f2e0a98f7d0762a12c38ff8c2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529b4f62679a69c2a830588c321a988a70aae0cbab0ce14622ec45993846ffa3 +size 76458 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..294fb0cd17c88cccfbfa1f923dc6428a0ba11a29 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faa9d9d0a19f746b8b6bca22ae3481a6c9fbcef558e2f2e9a35c367147c27952 +size 37298 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ac34f70c2fe0b2532ddb8651557fe3ca739be1d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30637de306b7d81920016cd55840206e832e75ad5c4eedea212f0781fd49ec0f +size 49521 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3aa6ef694d005887bec068a3b8c00e262c4248ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49c982fe851fd351fe233669d049f03f5b70466779a2d1e8b4dc24aa1bbe2e49 +size 68678 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18f41b9989e48f7d163309d090c56bc7076b3407 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1fb004fda5c970fa02f768310d780a1d9d22e6b9f88772898e99d17d9dd493b +size 33127 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cacdb36648b2a5ecdd82d5e31c576ee111023fd3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13bed8ef5d6308b693db45b99b2805f0f410286938ec4c656a1600d569655c99 +size 36487 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..715c8c7a5581e943e8e2fb428e62f583a320484e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d07083afccef9cbd370fd1301bca1677bd87a572566be275e000f1e5e88ea3d7 +size 32912 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41c0286bf1f867aa35ce3909c6c2daa46306e11c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88153bc8a08a1c91604576c681e922bfaa17d7b5c9251b10d7fc999dcdcaa799 +size 19006 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..257737e7153cbaadf97fa86eefe45bca24e61058 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d34fc2ab0021bb8576a4f5929792923c2d5b54e65f1d0b8be24f7485fe2f2865 +size 20936 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3c2cbb6b59aff3eb19fc70530a546ee81955c7f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e7916656109a25b2d30befbe65a72896fdaee02934af375ad810dbbbe36a29 +size 13204 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de9b35ed0fd2f4e3b939928340a2a6cc2daadd8d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90379875bef0ab6cfe8219411bb8170aaa0b2ac27b2e6559d581d845e81230a0 +size 37806 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..121c696acf8f85cddcb235c9b8a527f4367a803c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7085f8fbd27eada4110f132bf7b02fb1da7b32a5af87818a8cd9facff2cd073a +size 42463 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77e6917e8bdc2a3019e43faf794b86db6d0fe082 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a69dbccdbc3fece27064c85d5ee735ff6adb7377086c338e0a456095edd7628 +size 56619 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3c95d5c2df01f83abce59c2847018b28e03e149 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9673b17fae2aeb56a8583c0e9eb68df52866ae77141a10bfeaf3d1afea9dab82 +size 27311 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a824e02a55517025f029b0aceb52bebfa8148e43 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cad3d3744f8e51c87e456c0362ae040ef2aaed5f817b03b45eb3f4345034df5 +size 14916 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4ad5c084c8adc459b7a80d5ddf317221fc18515 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20f190877c78448849f8165e06b4bf9d8c3fe880b88c5ca7b7e1a0e4d2da4085 +size 45334 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94ad5381e30237b7c19e68c4e221ebdda7a97601 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deedcd9609209c5618a349414587d54200bedb6e29b2a7042220a71b653a4959 +size 53922 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a094f79e844c070339dadee529525b4c6cfddbd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29542afe2330eecc869162e792128fa07172fbf4bb7cab7ceed86d895d1f1b39 +size 74395 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ebbba74171f2f8623a9618a9a28a7efa45ed36b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b626959456bdd0e9a1933033ff47c47ce0d7a4238c6dbd641b0b13e6109945 +size 34837 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03202ced6d6bceb7b3d0d64bef2b50961807d42a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ef1c22c622f814d97b5e391d6f5911cdb48ab4569c7883a8d4a433971c44b1 +size 39171 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2e74375d709476dc59b7cf8e918bc6663f9d68b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a6e3bccb123664a393487d00512ab82313206314d2e51065ef9c226a9134d9 +size 78349 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_132/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_132/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9baa00c553fb49eabc6ab1c464f6262d343bcec3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_132/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30dd6b8dc54c51bdc2396b15ae11db2034615f6897d95e5d9630b0db70d4d900 +size 20994 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..105e713e2a2fb18535434a2e7d50b836367ed7d7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c7caca01d1c5d566061599fdeef6e11efd03f181eb0cda7615cd1a6ab2ad15 +size 17936 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5efa7b744008301b8474397f4db07df574856cdb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128041876300eca36741f9a1e673e4077bb45511955ce156864a5840184dee0c +size 75153 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1eb0aad4a29c085f7dad144876ec974940ce7dd9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:087ba5d28dcbfc2dd8404b417e8b51ff7fb0f5a42930572282fbfa49e3794cdc +size 27311 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23360c6b770f3619aae1c1c29735f5c12354d9bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1499005991772c0b762de14dd73db5c08f408a5f0d0f73b6cc7fb4335c880c69 +size 45622 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d02087419e522adbe6e97ac6d9685e6e345fc6d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd8356e86d55e09bb5288f80c518b27f8674400be258041b7d501b6d6849aaa +size 52695 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e0dbd4fd9316b3b003de3fdf4c523250aaf0691 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c9a174b7a745d950dd2bf253f168d1ddaac17559fd70165f2b082a5d7d98ccf +size 63930 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..160890ffb6fd342841e8f9c201799de4ab884f08 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc1dc4189be9ec77115faa15cb8b92eec911351606192e3c4cc0f8e1ba495072 +size 83110 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c347636d1174b30ce728edd4c965d9a471fe261 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:505ba4e1ed3fd3c1c2ead6c22a271bdc551084c8ba1c7daaf80395fad61b97b5 +size 37273 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..398c544dd000afc4dfd17907971f5f0f0998e06c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ddb92c5b1197faa4c95543f602eb21180e914d822d044bc9054420c3aa3ebee +size 49238 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d02e62a07a17fc077af0df2758e8f75bbe62ac6a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5e5f29b4aa553904d59eef480c48b53272ba86c23c674d9e13969492d92f594 +size 67494 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac6e575eef67aaf6bfed3847aef83e03ac560e6d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9962d50f807f1263a81194ee620d2edc5caf6d13c7e0bfd36e80b8ca5ec3418c +size 32941 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6542d4b9a54275dbb0060b389cdd6043e4061686 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddf2a7dd995e7cd4bc51d5ee803824b61a2b8012fe4a00279eb6ff660315bc70 +size 36237 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f131822c5713e9b567723db4874c5038ede35146 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d51f776c92624af2017ace1456b262a2bd277d9334eac6d551dfcb8d922269e +size 33164 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a930dc7effd351340740b4c6e25a1c2cc4e1be79 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4944c68c123f7c8e823eb477227deaba59e3b3049a72d19890d6984581f3a6cd +size 18697 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2728946c7c82cff0310f58ae118f6aaaa49d35d1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6d59a4592bdecd633927035a2cb4606a601c690ca4b98ce88bded1ceb043b24 +size 20951 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37b4cc1951aa7d3feac81249e4c80192685f07b2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6abb159643932d6de6658cc92a1b2605b11226e8417da374c25e6b5c6d5bb1ef +size 13461 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ef46632242586f7238c4bf213f24ab278a31461 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baee6d298bcf6be061701cb624582c7fb3d6ae85eacfb81d2e21d7b688535f1b +size 35352 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76ff6c464877edaca16a043c59b632f24d8ca798 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a3e02882533d9d400211fdaf0bab8be50759ad96a77e8bba232f7fe190b1ad +size 41614 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbde9569a371a90559c8e275780f8c063cf0963c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e6cfd87b1b5d31c21d7edc6e826e5315e08c4879621fe0a26b49c0728e098c6 +size 58666 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc85dd6f23eb6db01b5324f0522f96070af852ce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9edc351d2a48589c1a730d6b5390295b957512d8b685cf0f6f5b438892b6b745 +size 27892 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e3bbdfc99eeb6e7ba299b8c189c0232f9b599b2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ca6066d11156ccc09b256677434e2d07a74e78ff470d0dcf921f69f027cc461 +size 14744 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfae9b97fd721045056713d1daa39f8e8c642250 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3af78822708cc2a5f663b1e1ea8d2873cb0be0208f8f7de5b5014d5951fd801 +size 45441 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f718e1877a13a8975d6c87a42e8edf761fed996f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dbe992b9ed779de90cd68a4ee9bdfd7705c4cd1cbf686f726454906f489e2f5 +size 53509 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..798447728b9a9dec0a9ee9948c0fb538fd59b621 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8bfbbc5bc04e4b2b2e636e6e1341218e3ea21969b5391db56287fdb7496a937 +size 73568 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c3a17f09a48682e8eb8939f75173bcaee15aebb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:477a58fd18ae02a2914e4a3d429f4e9438b0bbd524281ffad25301b02020a8b5 +size 34796 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..146196794ed74c8b78fef188afdbda7bfb819cf2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b537a15735b0af26110d716a42ae1b3d0b79834cfa74e9004c8bdfca0601061d +size 39017 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2731a65b85236d7d4c77084a71adab035beb01a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0e49bcf83e5d7f16c88f4bb06dc3045de9e0028db002f6d6f7fdd7e28f97e79 +size 77661 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_135/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_135/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..802df69fe58bb2b6ceb622d76e763f9582ec8283 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_135/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9e9d4f252222ed0b0139fbb02dd0e483cce5734a215d04558f0329e114bfa13 +size 21012 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea92e8f8a1ee6ffa2add413d26c9c948ecb6a3a6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2795e25927e49fb2f5a649ca18aaccfd3bcbffdddaa603e762cdec273f866b68 +size 18050 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55945a49bf1066a4c03283f73ece0d0f04066941 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73f818cbda4ed368dccb59949a5163a2777ca14d0b4c010284d76f2196ba86af +size 74534 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07d9fbe78342aa450b4b2c3b0cbfef214881940f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ace50bafdd72204745d306a44b4042b6f82029894d2af926f923de4715938e7a +size 27583 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea76409c993753413a2ced671ce0dab48d60da7e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1eaebb79405d517da06d326915053c193bfd807f9f8dbd3e973496477c38b0a +size 45598 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbba3a04f3e485a5705496832c438fd4575eecf2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:261e7a69e4f851a1ca6a51571f73ebe7c56bdda3282e8c937a0dc0337c8903de +size 49363 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1115cb263addd7f5129f3f2567b11018c661d96 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35cc9cdf9c8a2085d8a2914fac4ef4c56bf9e9574f5a79d856df9be023cc8bf1 +size 63576 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15b2086cec1b33a0c36b4d689345de65d74acb3a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee8c6624eca016acfdce22cbd0a2973642abb165688cc7635901a9819eefa388 +size 79454 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de94a5c374427169245f2efaf9098f9fcd69ce82 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19008187225cfd8cd253ad6c806eea698042e960bc5a5682befc611eaf77ed45 +size 37082 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0c708cd1f5fd62f0ab6b2ab2d69760855182740 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07650721652148e3a554ace2b9921dbf24ce7f0b57c1a3032a9e2ec603e14800 +size 47857 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6c7fd225843fedee5aefdc2dc0480cb11f7f7d2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91fa1be6ff817f094404395a88b1b992a57b79a80bef198595d686d32976173 +size 67678 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c227b29b3fe930cf54abddbb9177ff768c471687 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6027aabd372016fc781171dfbde8dfebdb359d8b6fb61502dc366386412f5af9 +size 33207 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24988491079198a1686e6c5a536db489bf73392c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b67400e43bb1957bba3864e2e98134323c7890100320666d2dc82531084795c +size 36164 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98ee87256b4030b1238a88cc4561fb52b317d205 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3226e567369da58ef1f10d046c120dfa56d1f93877125fb6e557cb7d2bde3dd8 +size 32976 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..968e7cc90b3cb9a617b6b3549863cfacaa4b45d8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a36d676abe56a8a5edc3a181041a3ebea30d17978bf01617de521a3b0a961317 +size 18800 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd683ee17b4cbb7aa1956b5b32d2e00217433b2b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93f50ea3fd9918789e6f44a769d25c148305d55af71de3580f78069c4fb520f +size 20981 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08b829e7609da2183b82344b01a3efbc5897d70f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0345b8d2efc60b43d1b2616189244611d7e249384a45fd45aeaadd83a1a32758 +size 13421 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58b27d42be7624ef7b7423ed1803f71de944155d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0108e9717bab28bbb39a519860fc886b70c6743983ab94b8a8655191f6211970 +size 35056 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf6cc8929daaa32c6ddbd2997c5b1645bfea2a79 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de0cb72d2ae31f06db820132d77019b9407529cf1385fceb4f1da759d36292b8 +size 43171 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2c602f4864296beb8614dd0fa6078864f86abf7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a58bcdb968d491d6593c42d65bea8ffdca2fa93a1ee4ef773dbf12c29f0fcb3c +size 58434 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7eeaf6632033008bb09c5bd579d8f5563f977bf4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dc28e285216509d0639a2ac5bff9ad594aa7d83f067ab8d6d48a9b91289f04a +size 28538 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7751a9ca4d5ec36da185eca8888ae467ec560387 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc214ae9d1624b1eed3fa2de6244089358b06f7ae98514b1085a6a1a6cb0712c +size 14611 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54a6e33f20abaa797568e3b3866ee1a2fb047887 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceac22e35e3a4bf9835f3d8ed1c0629835fb0e5ca8f1af55ef8b096ff5a28b23 +size 45777 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..386869d6d74db43004d63e8001013af2632cb375 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e650d2d53764958c229d737753fe3f84b94b73313a41779445d29212f058e7e +size 53712 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04c4679e3d062ce1ff157ade4c0166279943c578 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b86d564619dcbc25b836e807af497186887351f696089741c13e8da150be0cc3 +size 73794 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e18307555a37822dc314556ea813d8808bede35 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d5625097ad43bdb99435b1ca8e35b63f8e66ef0d63784bc99a5e4c4b5e40cff +size 34752 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea33dfb16ab9539269fffb4ae965828ce0bf97e8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae938942ffed3ea6317cbbf4e6c2aac1d87f43341349252464a49c10a0f02087 +size 39101 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f4acd977fa5272a8096a5718aceb3126fe8416a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0bf4a623f0ad6a9ff06d4a97e28a8ad3ace2f6ded77a2a33e0e55769446f1a3 +size 77583 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_138/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_138/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8e6c4e503fea82f72df4393ea18bd4a45fb2790 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_138/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c909e0424c420789a66a01f735ca79e11698aa36e879b5995767cdf7c3c8ccc5 +size 20968 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e339764770874e8d3bb95b3daf13476bbd5fc90 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a574cc2eff11ef1dc8fd313b61b3ea613b460c9e42eaf95fede58adf4d02befc +size 17547 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa3d5481435cb2bc2c36415efd3ea252ea0cbbd7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2276cc91c083faacbf4e6a40aa3c158d7972da7418f621f43fea40064f1aefef +size 76223 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e9733455a3408848809a8b53ea81900f12d2d13 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:317b668adb023915c33bfd6d4a8ad406d1d4d3c3fab9184c05f15a08d8d82244 +size 27122 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31f4f00b8f105fd42e1a567a41352d17e21b5e54 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:395ca4df1fac1a19caba865901dcf48ff1c3335fed775d12a398b49c4641e4f8 +size 45070 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b937609baec4efacf276730a4061499844bba5b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1effffa60ab0db36678a92a0e7f2d035c3c8f2fac57a836cb7c046edaffb9723 +size 47212 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a38f18be95e7bd24994901fa0a34a7da6c3ef7cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0439d21d4781b0836c497e2f86d75c705145fb42cbe78d3d88b61f79bcaad056 +size 63408 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..856b0937ee431d5a73b782e9621acbdfb3a9e186 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97303bb767ee63ee1a3ee90a0f7ed9e59c2984ea52cb9ff0ef8eb99a1d679407 +size 78267 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de3c5e911177ff383b06ae638be30afe4e22e04c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6f62b7f8edfc1a491195df2e7d10ffa06e3e90fea6fc3beb9ef12780e8eff95 +size 36532 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aadb2d030c10ebd2244598b40bbe92afa2577fe2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b0ea9ec9feb840ae164149245a384d429f6fa91015c518c7024196daeab039e +size 49689 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35ce1601fba47e5b87970825ffdd8591a2981439 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed49c8208cec7aa1963e9f58f507d01e18c88ee3109cb5d49392b4c2be9b9612 +size 70570 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd217b2e615e37931511a75535505bc6d1a53306 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6b7170af233ff72dbe4e5570210135fb414b2d33696b0a2d97c967c1f95b9f1 +size 33221 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..604a2f8dcc99c963012496631561cdfa3c3c2cee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49bd5eb4cba8fd65a43a9f6bacd47bd0e079b0834a4316bb943cd7077941972d +size 36128 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90708b7d2276709990adc9cba7c0e5a7efff3765 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9948a10f76714b2c6670f2525b331c356140a3d1222811a5ac656343f802e6f8 +size 32175 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d8ba745825fdffa09a428fd3e5745a6ddbe315f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:108c82378991993947f7cebb5026f804785e28ac0ce28195bba06b96a0abdf2e +size 18912 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc1f101d260be45c36eda9adad0c82bc63079731 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5bc0792858f7c525c04ef0e2548c215f576de017c41676fd3f79a17ce430ecd +size 20883 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c08fada0d0192ce74eb48faf27b7905d9ea4127a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aff3381f6815fab20f0cc204ab1bdb2ab35309eb2dee14a3c04ed3bde3426ad +size 13706 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29573b3d044f01a6c7abbb99f2603f660bfb1b2f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e98982e36d62e9caafe535306defec5ca62e56bdfafb7e074074ffc327ff8d79 +size 33881 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..625b58dcb1ecd1fc6d10f6a8fd5fcb2cd104b00b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f527d99a8eba365a2a9b682eaf2de5aef64d26472843d0b0988974d4c7141991 +size 42457 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35c0118bb4b9d4e8007b704c7618b190476af22a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b829578591742abec9c7906243fa9dac1ad8616d5ced821672cbbec077d7f77e +size 58972 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23833852da2b5274ba1839741e1a973f17f1ea65 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bb74c8225ad18b9da393213d02a598be04604d4c2f145a4688a1a5d0c338980 +size 27929 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8173493a254f57d3d00c0297e98da9e87c492fc9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fcd0b4d14211122cff4b9fa9879d54fa21edd7a570bafbecbbef85f08364b80 +size 14627 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d67fe5cce8345076cd933e341a72ae19ac57575 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78c4628eb3f31588548b777e51e3b08c1fb6fcbab2e9b43e568f8ba9b8dd84ac +size 45485 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19086ebd3bd0f3402dc0e006ac4262d0f092726c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f43cca478593e5e6c82e5867b35c4c4dfb1039478fb58a97abef7cd66e82b2a1 +size 53957 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40b606862a2af378d0a36ef318a0df5aeb32ee6f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b2a4eda374e9ad0f53c5e6d22cb2ee3588ea7dad4c686b3256a762275ba4cd +size 74209 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d347a9b2d57657175cc2763dc0fe640efedd546a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b12b7c16558adabf26b48eaca54504398429dafe8d77a4c5eaddf9993a9209c +size 34812 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8043c765ce44d1e2510e58598b7204b5844ab607 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22d271305752cd4249a9b71957186c03a98cc193a07b8533eb0215bd500545d3 +size 39305 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98216552ba35e601f31b00e3232bb1bc36fd6672 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3513332e2fe503d483d723c551d7a9ecfb57f68b794174420d3cb1ad0cd7c634 +size 76700 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_141/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_141/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..344227b24cab60fbfb6783f22ac8681a08b8a5b4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_141/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef79564f93274cec9abef91794be882af8ff4e4e1ffbae7ef323cd235a3ab26d +size 20979 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20b8190dde649f3daec3a4e49f1f4e642cb215c0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a207f7a54a269fdd43d3311e2103e63fe0ad9f6f300fa0829a25d0878cc89fa8 +size 16494 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90115ded73c9531437835d681535f9c3e552570f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8810168269173152debc094afb70006d6ccb9f3cd4398a8d8b681ecd251e4f5e +size 75110 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc1f831f0b98e5de50aad4a0b13dc9371f957a7b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00d91a318051e2f858d5ac3a6ec292d1d70b18dd6af04f0bd8191b8729196bc9 +size 26705 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35fd7bdedba45ce51f42d0059bca77d94420b61f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54d2b679d068b1e779fbdc341405a650853ff80007b14f626065f05753ca8295 +size 43472 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6844095db7f7ebea49653c3a514faef297be30e6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed17311652b72b7ab5f6571679db8248aa707e637678a61f513cfcd2ebd2847c +size 45195 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de040f1477a1e1b39319dc8e0a407ba4230b5c8e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76b7fcbe5c1e6576f17cd898f68458bcfc0e071d9a677dcec0af935c92f42daf +size 63185 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1a913344f722b4117eb5b7659ded4699ddf1bb9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bde2d5c4006dd766348c9eb87ced90f0f9f2894c7f7ccb2a6d715851e5dbbf9 +size 80115 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ef0fc0009d4bf1b125e21ca2f880502b0bf35a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:067975667087272669b809f01f871b29c198e6a932ad63c5c8f5c83bd9f4d5a0 +size 36979 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0dd2697289fa71c4f6b9db5c5933f053e31e3f7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695eb59092a50e6fb54d4a611aebaa4463a590c92cb27f1a2f8a0d23c87edca0 +size 48229 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebaeaeeeb70ef5c994c7d7d5cc5e79b6679623c3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71a955a0f82ddf91683ddef68e7d33f909e40207bdea333701fa97ae6ce51f9 +size 68080 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29d41b325a10c70f39336b0c5efed832c8a9b0a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c232f2ecf494c16d0fa8d004962a78fe7901d88ce2617ade7a46d60fb5451202 +size 33262 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..067fe2a6e0e62baf94c0f0b5fd4aaf0675812745 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb2e0a7312082884b4bf383bf64ce94ffc02a8af3200c4e84d2b2db978de8d3 +size 35395 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1356bf49660fb70539dfb6981c5ab172a2ed640 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be2eedd54317883745f4625fdf05b3448b911df0e7087840f2d683d589ef0658 +size 30743 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95bb221550bc28604fb0a78bfd43ae18d973ec7b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9ddaf2063130b48a2bcb23464e0f64c15ffb25d1d2deaf12c23d47ba8714b4c +size 18673 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e522b5489fe74aa512392dc6024b129b8bb13cc0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:161dcf45c94cb09aa5c3b6f3885ce55f2059cae85e73bfb6812c9133435a1778 +size 20852 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0cb961b0c4f838d3bb3389aaedbc26df6de28ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbd7e87975c8211d5d2c4ad2dc4be37c5850df6e38fc4e856e4d918bb5e0af1b +size 13165 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..beae86f6f76ce31125f4e3cf1a3f61ec89280666 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7979f94578ad2adc836b8d0cc92643897022bacec4c44f2b68898741922e7d1 +size 33751 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69ef2effeea44590c050be0b4e2a7131447864ce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:485600bd1fe20f80ccd324ab7123725298767186fcf5450c4124dbf6ab5f7467 +size 40193 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ea197fa0fb9c83c09c5dd03e99e46a8c3abc59a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6c79e33d7b04855c940c13d0619639de750ea5d8fccad321031e87e5f40245 +size 58008 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe9e502d9c0f690fafbcf0fe18a88553cd5ba205 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081e9a52d88114c376bb795e9b3cef786751d6d50b9a4b3c07db7105ab061b0a +size 27529 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5faf8ac594ac9da5d4063214cdb64c5894e96cb5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8326dee4b93f6f292eccdc98bf36c1a92c89dce6fa22f5ed5bd67ffc389c905a +size 14763 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2806f132d7494136bf0c5c7a840966bbcdf356d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b88200c619da3344529eb6efdd342d3890d62242f6bc97203c45501b91f4d84 +size 45050 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80da48a6ebac18e05d5bdd53efec3eb9548f44d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:839e0fb8bd336ad83016d236ccb7f74cc95310b79d1097a43578e8eadb6d6365 +size 53613 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e91191af6ab7357ab59b0c525dc94bfd77968cac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7488be10c493825c16b1ede1a20a5bec7e88c269d121f699f9898ed24147ae73 +size 72665 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a36dfe4a236139159adf8c2b8f7e7c8817e5ac65 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:313aa3e40f246599bd8ddba2b58e9214875d3f66403eef02e230677ad8f3cd6d +size 34997 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6a20f93dbc6c91589040307919bf9c5a82b90a1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8755358458fd2010512307029f8a3e6e60444e5f8812391e75d01a48805a1a7 +size 39215 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5aa78587b108a697a331128d72ee0be4d95ac52e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2792fc854388e8b9afb87d2bd6c1ffb4e0a5a30154367b888616fa3655a6304e +size 77575 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_144/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_144/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38b92793298905b1317bb43cb4b46c917ef7f2c2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_144/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03d7f551f3e6062b99745d7959891443f9bb92c12f94b8c49d0a6464bbceda1b +size 21000 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2186b49842614a71203f2d5476c6500283142787 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7502d1c9abc6da94ba1b27125334fbe9117db664a241cf446f5423fb9557ba +size 17174 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b39f2d105e8a7c1a95c6386c8327a874ac8ff1f1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af9889c7ddd4bfea250cce49f6aad808d56be82345e79b7e60788df4415c5d1e +size 75070 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..335c955adf2be64c5bd24dcc90582454e1081f5c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3729640b60aa91646ec264ef0f926eb58bf6ac4b3f8ba31dceaf3529e81de680 +size 26933 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4675336f5e7732caa0d78aa6ac267c0f5326228 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88c1692cd9e226b6f1c76b8151c8b9cdb3080db1feb6d883681323ef50a48d41 +size 44660 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7449162552c9bbd808ef79b99063a414f96a45a1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27c74f3d70b812cc5c8c34d6cc39a953e0741fee66cab2ace5f467376fe9d330 +size 48796 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8cf255e31d499693b8449d7e358f11b546e4b51 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:329d5bcbc2c7be8c3aea8b2425bbda1fa00febd7daed968d8cdcc37aa33527c6 +size 62281 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77e91e2832aaa6e61b69e6c4a16fb6413160fa9e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b095bd342f853d2adc08027f0c82cb145218060f55ae01afc50a7398f030885 +size 77483 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b52027e49403a127d5fe94cb173909938cbbac1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a87cdc1a8da9a3bced01ab1bc525d0050694c253ee3e5dfa2426b64ff46ca94d +size 36935 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a746327a3ab96b512f110df7f5b2853af4698fb5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d2abf49d4caf47d34b152ff9f662025a6959868c9eea416104ccc57c5f0897a +size 51936 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf2bfe7e3ed27f8e53b0e987faa209720a831015 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dac941b7015571068786e0ac39b8dddb4e29b5de8a10a8ebc2d31b911b5aa6b6 +size 74486 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..775b3d9df8054897d99e3d40d06e4c8a2cabb923 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781ab1d9a488d812fc9c27e4c16c72bc735d190316e4f0f766db37a47f7c3ffe +size 33370 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1ea44587ec03471bbd45a310f7a4d0440b75262 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d32011e6e70cd77b49ca99a5c2c9eb6770740aad9eb37dc1f0b0a48e50271ab +size 36235 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b26c1ebf91dd09113188dedb20baf9379e8056e9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad413848e597d78b6e701155e5e91d9343570b68087b4d8c20316fba4c76383 +size 32234 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39b1c1f4e8bf3c8b105a9397ef39bbd72ecf8339 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9aac5f362826d70ac33c7afd8a9f98ea639115fac7d92dbacd27b2083fb8d539 +size 18977 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75d11f970a05b73896ea846e71c83e3c9426ad95 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1714b607e52dda8848b4c8796840af8b89567edad194b01382a25778939c316a +size 20981 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a5f80881e5989d2883d552563257cb9c2ddf4bf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09973e65448851e6107a0451ec42f2603b4dfaa9c8b00ab3e76db241257b9589 +size 13502 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..263184e5745dbfb4b936ab3810c7714c555b9189 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429bf6ef930c241777adebc27280daaf0496933e2d5304aa590be071e5523ca3 +size 33926 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1504dc2d655184926a9c2784b36f1e004b4883d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c6cfd6ceeb5760bd9b347ba7226c98dd9541c6c5e0b624aed4ca9f386dabba8 +size 42329 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..552558f0235fd6a59e00d76c15c388040edc9335 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc21ad973a66e3cad726a9673e94f7c0b4a7a49f1eff9ba1b09f298b13d5715 +size 57105 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f357ae8504c312e584bb87028d7f8a3241742de --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6a835c9cc813457b42668db4028812222efb70317c19a6b2cf8f8a2b6719db +size 27281 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd3ce45bff637b6c012ff5af10733db5c9d44141 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c713098404acfc043ec81069e2c5f9854775592702389da34215408ad20d389 +size 14899 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f06b1d7b9e3d0c6d6213b73569bb026cd34f197 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7afdc9adc8d66e849d70f78418d19aec88efff55552ade5a82000d45ef90add2 +size 45220 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7453ca6b82c77342f435950d04bab897e26572e5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42658c7c23fdc008b46a7c88de30827b09949ac2c1a64c63f6dfa5d1d58bd413 +size 53240 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71671b9e6b486ed22cf89585548df00f7a1d7945 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fcec3251a2b1898de19d441f3dbeb489d4df9d8ca526448f6c8524972232afd +size 72606 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23f5c2ed20e18be2d87e6b33ded98158d13a0341 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab900213a834703a41db2ed3b6ce022190ce92c6b785852baaf683a49de93bec +size 34823 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a73a643901fd7b74baf972394fa2fc4feddfbd1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f2ddfd77567f49244b22818abeb65c5a34764573cf911af39e919dabfea94b +size 39179 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0129fafb6d1d4c66ec551501fef21b8789b6d01f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9cf78a502344fdbcb4596ab3a82f20b641bd77506e98d04d2f7a64c6e1083d2 +size 80635 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_147/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_147/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1783741d207053975ab7e4fe226da85f9ae461aa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_147/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2e8a615daa179b5573473e56224a43d72e46f15fa81881513b404b317aa8ab0 +size 20987 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fe34b3127a1f8758688767d368d04047ba6a41e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2559cdafb6a469bbe0433d4168def149178fb7aa5b1fd748cbecbafbf6261775 +size 16809 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1e7fba919aa9a887368e2e7361cb2c3b75182ee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a45beb1882b4145d57307a94f22b0dade05905770b942686e1e982d6ed65cf0 +size 75533 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d870d9c40d0cf27f006ad798f63c9209c400d01f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53c55c519e528d3f9f70137ff94b463ad1f4bbeb8a7ea2658c5e94a8b370d86e +size 26830 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0793f3daceadb77ae6eb85712811a71c01efb21 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:952f4740fb6b2d62a26ed5d730f22bdabefd283cef68822ad96cbf6e026ebeca +size 46093 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ddd801e8c3c7dcb07b921a69d0afafdc5a6a77f1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a1ee3ab8e5931f013fe80b6be66d62b311e29bfa42d0875c14bc73016208cd +size 45856 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22a2827ddb6aa44a6884b988715a6f028f88b440 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6482d6d14d88ea59cd31fd826b5a21886770d0785a7424be20803bf69d75b6 +size 63423 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec6f1eb8372f20ea0dbfddf4b8a1f580ae5396cc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff8f66afde30b222e25536f648f32de8edcfd5d5f15d42132117fd909a186607 +size 74533 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3dc3c07121b478ff4b4e21e9d5a4d7af854c87e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a2ea3b634fca15edfbe234985bc5303bbd721a8377d3fabefcf43dc9b19423c +size 36597 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7275b3b003557b358e0e6dba507e14e14d781dca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f52c24815b60a9b5cdc5c97be3889de8ff0df1308e4b8ff5cd58da9652bbe2 +size 51465 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a8c7c6e20c8a4a66b0bc320195c602e9821e448 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab612ea7a749cefc80e39903d53dbce77fb24e09d26b3442dcb778ce7af6770e +size 70316 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76a8a247a20fd63354546809e1e564c97e2dbac5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ceff7fa935435a74fb92d0c11f6bb940ee48b4d340ba7eedac30c3b78441d8b +size 33258 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84a1b832665d240e995b7ed20b8f474cba82cedc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e34248c24914ccccf90974d11923e1bb7a18abf9d78f27116f7a1466dc3c183 +size 36375 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3ef337447ec42a3b94a4439a788cc3c758cf3c9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:527fcc53334124fcf6c48aa3a72e89e91cbefa088c6ba7ac91e8702bfe4a8e60 +size 32709 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..beea9d3a26a6797fd5301fdf4364a8f899f526be --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ba74b601c81019755ce1f28196f6932fa0088ffb3f591ae63785f6f8dc2ca11 +size 18977 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf0cf13558680f5f2a64926681a8a6c0ea74990e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd403426874cbf46bc446fd0e5846ab199abb85f58820df96cf8b4e920ed255 +size 20830 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac9832742d16bf5c962e046c0ae9adede75c1fc3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d53cfe4272f22d3f9c8fdcda4f3aec89390e2f1df151bf86435ee428c2974b3 +size 12569 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9abf5bdcea3ff1a2e80476baed1436f861477b72 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf40cf425d33c3585caaf0f9dd8f147d2907db6d49eb6f7faf646ee001659b49 +size 35465 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b7244fb73e0ad7b4296ee2f346837459aaa68ee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddd5d53ec56cef53dd0fa2a2a8bf5ad7f30223f113034d6884c3db8c865a0230 +size 41371 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30af7c91f01db2cff887b2c1042fee4fa2dbc26c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ee937e4e517bd253807d85b3a0c9244277a6fa8488b0105be242b11e1e8970 +size 56623 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61c26c95aaf6f86464665ca6192bdbe4dbe48c5b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a55c5b69884edfe8e386e4e082798a5917b3a1466a669accf2d17ae9c6243a +size 27844 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4847cc1abb2881d32eb3bbfecef97409fffd6b65 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d9818785dcb9040cf80cc2232d7dfdd09c3bca3414fc300cf548e16ade658f3 +size 14795 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bd8ebd8a1403d79bbd163293c27ccb7cd983bff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc238e1451b581192345817f88046d00d1d847c8c7203cdabe68390096ab7602 +size 45261 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdd753ea95996e9363375a33c8f2ebc924da3916 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c5b0dd2e16cc88ee8f1714f808162b5df685c5998ec891fc30197be15d06483 +size 53308 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6241a130f3c18b040a00933fc893cfbab6328a2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:849694ee0acc59e8c75acdbe2554ceffea8955e9f30647bb37c12f7e43a48e6a +size 72909 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6818d1f8cff481fdaa4e9332099a36f4930e3b46 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a01eb3e5611186475f61af04462849fbfd9e55d68050e6564609c5068382dd02 +size 34829 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c039b6b45a730e2b9ae47f0fe2ab79ae22507583 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e8799f409e1bf0ecaf7a8f31a75c0e844b84c7f732dbc4a049242b244977a2 +size 39090 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59ada8f1d82cb0bc8a71018a611512e57cbd48ef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:317b1433ebb48276215bd81c660eeec79fcdadab4ad36404a82f2195a29c78bc +size 80691 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_150/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_150/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42ed1bec30b265e349e0aaffacf01ae9ebc92625 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_150/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80e4452516870012e44cfdce219b1a8ac7b7766509454d67ce117c60dc8f8dd5 +size 21002 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0117b745a15f9f350cf854ae74955412ee3d7afd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d188a31a69189dfd572dc6ac28048068f80e7533c94ba8d4d26a4c964e9029a9 +size 18407 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71a994e2431aba35409839a1614458c617e0d9d3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d13ba8d85e700f3026197f92e5ef13594e5a8698796c11e69fd21467f664d22 +size 75960 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f26fd16e852bee196099261c7320b66d70db6343 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82da07b94c46ac0087f3d231b5b0d3c9e4fc6623b268488430ac1b779bb92ffa +size 26825 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f5e4edda98ca9226d5a74111137fc3dc7433811 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8642d0007a3691b55e0708de3046cdcaf73567046bda2d35008259c49120579 +size 42653 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..924f576e2e381cc89fa478da6ffc19829d44376a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1318dfb89c51049ee1201a4836137a8a9bdda4ca97d55fa72504cd828583d0aa +size 52928 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbe596f6375c791c129840d707ebf4abb7636818 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bcb9648301ac4fc64915445344c161929029f6e05c04bf18d555c2c9ff8f6f4 +size 64109 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60bf13c9bcd5a73ab78984416d25831011c139fa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84d461489d224586926ef1784e3d6f21bb32605c947039c1ecf6eac61b97d47c +size 80057 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ef949d4057c90920ebea544a1da6ab70d91d6dc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f00aff215c0d492f31796e45009b6b031ddcb40f4beff30cba95c8d9bc0532 +size 37356 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..270677dcd621cae75b7fe582ed2471b667cd3f21 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1544c215ec4c24e267bfd27742bfb0aa1700686037590544510d5428e3eda4 +size 55462 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffb330e87770f2d5e1117e2705d6e0c5312fff82 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f919aa05a0b7b9012d88ba1f8901ad3d873ca8f00fd43fe986405893d63067b3 +size 71932 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc8963953ba1bcde525daec026b27997c0b3b836 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dee11011ca87d34e2f7dd4045991d8420919f09eb6e27041ff0fea284fb904bc +size 33409 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45d51d293a5d9f79bd48c3888208fe2d5677e851 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89af5a76fb9c6083bd782920e112efa148d996ffd42de617db4612452b59d391 +size 35026 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..669dc48b3a4bc100239847a0d633d5acabb5fd2e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21be311afbf1b5061fa352ccde46c4d3e937b7bfc642e4506c12cb4711ad9c52 +size 31786 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32fd77f1af83fb6e523f503a0a867d7881b670cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc26f022a6b901274c0b9c32eb1665657b73a7b738886f717987ba0919744e6e +size 19618 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5058fd0e1d32df5311c50c97436efd5411c9590f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:239d14511f7c8b7b1a4f5d65b4f773ff2d642e4549acebbd2cff904c84753da4 +size 20792 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..458db2937d182050c424d12fbe4eb1c07999fcaf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc1317af6a440943dfe4aa7f39aa476f2744215686fda2368e2332b966e56004 +size 13807 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5020dcb5799ffaa5dd8cc193141825895876aa41 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458ee96a3525ee00e4822530a98d94bade2bcf9a8a1262837b4a0bb4d3ffaa87 +size 35868 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47c7487842db0372920be1d8bcafea84afe2fd36 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30a8b65dbe23a23110c436746220f6cb33bee32a13a2bca6f654a81ac90e102b +size 42186 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b5257dce0f2302dbbeb89956cfee0d02930d616 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65c3cab5a7d26ad61d011f950b5be955f3cfc73e72eddfcf1428985ca6286419 +size 57476 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc19f9d48528eaa0c52c462ab1863a3720cb4c3f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:139aa47efa986e416a6159a1e3c881ec181e6f1960c7a51cb7e582ef6bec4c16 +size 27082 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1716e4e7a5274e18f87302f249e45f7cb5ba86f2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62592f84ce194fbdce50e50266d276a1e95f77e21a36a6c8ebff6caaffe51f2 +size 14810 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae8f3a29ec514471b54fc75495f65a87043746fd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb501e4e6d1a602aa9336c3da394c954145ffe1dfbcc3a7405764652c22a2618 +size 45256 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab84e6b98f6c8e2a22d875a4f203c32b21103b72 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221de3be341b087e48ffc0644067565b1a6d91d9d1524b98c23fa454c42dcbee +size 53537 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24f1745fc825e1806267cf63cd0d5b765df52265 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cea5e776f9625a17e53962203256c4e902b5d0166962ea07e2d56386e53353c +size 73680 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b959438cec8ccddf3e986bcca769665edd5c790 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42832e604ae124da6bc2afa972ef273630549a8da32390b3b42dc944d262830c +size 34815 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c05a3c16b44e7130c0a7952f12a65e18226bde96 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe81509bd397a2b53161d638155d6ad1c06bd2216ee40324b08fbfde9ab0de0 +size 39092 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed79bbd7344b2c05efba7db673fb18e148e1878d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b7d3d8c315a7bca8434ccd80279f0eaf30e15622edc7019ae7bebc83d945a0f +size 78158 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_153/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_153/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8a58d1d3029c6a3d2c671c2baf48dae927cf9ab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_153/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0153185e97a626eb34a97e77de8fe3d363ee657723caea1f556dfa2d8d1a4024 +size 21012 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbff0591142a9a924756a5358c8eecc613a11c9c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c845b092bb69d199997ae824173f1542c514898fbeac009e1d2d9ec8ed8a30 +size 17797 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebe62297a93422e1810f553bb020c2d6da0be21c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2913be34e5638ade622d8bac00208d2cfc34ca09544c3449969f9ae35524a228 +size 76224 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..133799ef419f1ed79d5566db36e823bf45f94ae9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d16c87cd2021e15c7dbe6d181e14804dda8dd27df2cb0cfc3f8b736a252b5f +size 26946 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..913b6912b952236b0dbd2cd416a6fc9993430946 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbf6404dbbdedd2fb0ac6e66bb0d341f71fa21fbe1498b7fb6946d8d7595ba27 +size 44619 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89d25abc9ecaaafddc37645916f96fdbbd8801d8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec58abcaf09002b8a5776ab57dd1e7080c3084328eaac730f14f9b37c340133 +size 59415 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7a71b97bf7878bab2a00b2db59678006daf7c5a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb53ad606d299c14525bbc1379e812160b8c58162b291b6abce9f11ebe2cc453 +size 64594 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7c30e9d8dacbaa353f775743dd0d155b49e5bff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:449e0d258df966c4e7257b654b3d17e929225c9fa9811e04ce860fb87fa7b42e +size 83579 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13b8a8144ba5965bfde83e855be9b278b2885599 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d321f5da81db90b70e91395fcddc55ba190fac0e7887c2f59d126c34a21f29e +size 37137 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b12af360c797f71ce1ef91701fdd3705e03a05e8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f80312d881217f6a9ebedfd61084fb97a57b4d56f28da478688aff666c187a +size 51039 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13cf67ff10902fe717269c97942afe56ce728e9a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce3e7ecfbf48251ccfa655bc041c42cd4c3341846244401606873c03864704d3 +size 66674 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..106f599b325e3a71b2cf8981fc0433a754766872 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca0f56827e368872d4c29925c67663f16e5a68651a760ad22adc40614c0f3e2e +size 33317 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1207240aa4b755476a788a8db00151d9f87d864 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76a5839ee12d00af1f8d5899a183b91cb6c4da452ded149988caa2d667fc07cf +size 36732 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49a44cfeceac9adf3406250eb3c90c962f8f8fea --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e88269de5613dde2e976d868d568246178c2fd5544ec0d23cf5b3a2ea93473ce +size 32604 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4907af1820777a9d540a116a783f907193be802e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac719d7efb8b9bb91815ae7199afb4a6a2a35dc5851f0dbb7aa357b2ac28fcb3 +size 19357 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2edcd1ebfecc09179f02d00e9fd0b61fdd3d6daa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8becf1255645806e9441f7717db2052e8b54748b607ff0911707d2d60c7b462d +size 20850 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29b0cea39cad19aaa97ff762e375549d9db2823f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07f65ee08ccf449b9dbce0e7a6e8d216d0efc410ccc95b6d5e609bba62cfa2e3 +size 13610 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aab63446a260ce2e14518717c1ebb8a4e8615a3c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae9b61daca7903752d7c2a009cf154a914137360787dad545ada2c5de63f9c67 +size 36322 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ecafb7c9c0a1d02b951432dde20fdfedb98eedc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b17128150035e323c6bbe8616075b6f96d9b723b9d3c2856fc1cec59adb7d7f2 +size 42385 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2bb7fffc22016ecdc5cc6cb19a37f553d5a2a1f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:876f3f84c6cec4731cd4a28f64fbb4bfcaa13596c3ec5af98c13afdb1852c840 +size 56684 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7549ba8fc7766a2b0932ec8c1c70a768d1e3a8fc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7716d3f1a3f79ca9f6326589044bc3e83e9ba71d09239b9e80901d8b349eda98 +size 28136 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15b0bc14c9c79ff95bcf7ce32201b2d2b40d44d0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ddbbb5091099d0777187935b2e9c14b3e147c2d17bd3ea2965452c4257be18e +size 14736 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef08f15763e7955f1c7a056cba7b5d7e50c7ba3d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbbc5ba59c31d9efdadb02956d0fc7b3a59644fc48017ff6d0afe9969cee79e9 +size 45496 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f264d612f83ed98d5ce93d56c4e1aeb95b11c3b3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502c7942bc0c80b36571d6cbbc56add0b853ca60bfd38d29b667b93bb1f1880b +size 53803 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3181417a82be0c7280d31e1984e04e849ba1e2cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b94c236146ae0d27a1b10fcbbe4486412616ca9aa5a6c98e92b9cb08f078371 +size 73989 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f00d8d605565cd6df9ecb86d361900ffccfd899d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d9f2261e2b6a321ce80fe66bef985dbd467e9d4e68432ca7127f834304c09a +size 34819 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9839227e821845617bcddaabb8ca7c24100e95e2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c58c6328b69ab1d6ec18ba51c45269c682684ba2eed8c7f4e10886dd442670 +size 39128 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d79acbf9f44c6a8b6c22468553c76f379d62e7ee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60d7879422deff461b5f9027e585a4eb2f5f1031e435b6fbcca7d36fe8a70f38 +size 81827 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_156/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_156/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae88cd54a72781df2ac673864ba7a558225060da --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_156/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d57162f880c72d42fe897c3bae37381dacd150b5c6f5ca2ff37c715b91d35193 +size 20994 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f5ed5725e20e3d2e2c350ecf038114bd07fbe59 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bb625d0097682dbb2d2009b639a1de0250646f398cd9be87540dd26efb0263b +size 16869 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb8c860d7a3bd1102c19fb8181035cd3de6bbab0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be39415f666abb2c6dbc6d026e129db01cba2ef438ce44e1185938ebf22c2351 +size 75635 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4997e4239352f0fb41af313f9474e89fb42ee3e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1b8f8b7e660447131059b4df0246a2f1ebe404f7aebdb4b51705b88ad46be37 +size 26822 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6db0fd7c7795ae3fe6d91454d21eace152100805 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:901211946fe2575ca5c327ca3d81a274760a0f926e9565a05a35f5edb15109e3 +size 44093 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87b6c93b8f9303a41cb7c142296553336848858e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d94d86165bb6ca67e73fb3a0cdb0471a34848fb6a37a75efe344e53af1a3976e +size 53937 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2dd12c29d9805464ca8ca0cf1e4d412db1dcb40 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1a0b93a78fd13d5221248c695028c679bed6accf6185ada3a95c52a5e8f1dce +size 63891 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a61a679d1c1d57ccb846854fcf7e45a69f92e1f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9880830da9946fb5367de7703372731e0628e7e2483b319a099f99555cb54cb7 +size 84363 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82f2225eb6d9dfdf1559598342be90d685efc0dc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:595a2f7893988de6b61b4ece92f31961848c3d1e6f61a2dd425dc8bff17571ee +size 36578 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a2cfd7a84839d99b643558a01551a2f183c6cd2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3f8c28b85783a5f23031753d0a188cb1785b39ba97c26dca24060dae1cf292 +size 48952 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3fd953606c79aaeb9dfb913a35e2e740e5193ac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60cfdbd42a3ac76fcae0b49dd2fa26ff04030dd86e0f7b38cab742955225fc41 +size 68533 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c9426a8d6338eca75a44fd33c6b8e2fea8e3032 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f821de33a8377fb1ca175790ec7b84d6d1547b194e039cc66927450293e09430 +size 33137 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..527a5a79bf1968013a1f7f0b5f1a5fe54fb49e38 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:020c1de2fed240a2c31d99dca34507709272409ed5189a9380371f477b488a61 +size 35315 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3ddefce52c9785f30d222255362f4553e41aa80 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe6ad2212926616e8e763c714b72af1662dc499b3f3795e96fb9c794f1d2df54 +size 32424 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43122244ac2fe479ea73b4e8662bed27e30cc987 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4b67ee1af864fb1434b90e22433f2c788160db0beb7ac40866a63dd4aa7a20f +size 19468 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48f66b9aa2b321bee93ba28082d517abdcd2de23 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71134960fbebc30bcceb4d9ce8d1785ce579195bfc49a5deff46ce81d8c7c0ff +size 20941 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2df84ab0ff3141b49effdb1d80f2893b82dd8bb8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2535e629db5cb83bb9d0f8786e6c7bb147a00a847eac919b0a64694d79e9d533 +size 13447 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91b0bab1195487afada9052565eed0733f7c96e9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d34ebc6d763791cd44fddb84575d8cbf1f73313a01c2c8ee07546841671bb6a5 +size 36171 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68cf9811d4f3c177e6b9ca1ee415c0081ca00d24 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:854be353a0a7612410f6ea74a34b7d43afc9435a17199f163e11cb0cc4d8b88b +size 42304 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67c4b35ee7bb8b9c152ecd2c49e94abf1fc10fcc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:743410b2babbd0433e960dd9718710929bd78a374005756b2cacc64ccf2e171f +size 56998 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d920e168947e0cd024646968df7a7c9cb538edf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02e6bdda3d73ebf3dec8ce6b2999e9e6bff63276e1354acf032e0dc04cb60bcf +size 26026 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81d1f572a2bddcd4d275b83d6e04d1aea8121a6f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a218a4fa8203ee3782b1a07ee1b4cc72e48618d93bb7ca0869d2b2aab7ccd8f6 +size 14710 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6acd2f618a18db7bd2c23e65a7e0e526e5ead8ec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7fdaeec660a9d15a769fa9e5200624ffb266e515a92aec6ce830c2e5612180b +size 45398 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d483f60f1c9de7a69e59f79cfe3e3c4e40a57e8a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c890ff73158869e59508694f7c98d962f9c37030d3c3c110fc638582ab6c73 +size 53575 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3a791e76070a31dd429a7411a41be6de711280c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:082087a4a7b0a4642b6c66eaf980a2577430da0fb4fa0249025c1f8573b66d8c +size 73231 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..964c1a674faba8439845e389bef527220ebeb938 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a4c1dfd8a00b46900e2839aa507df4a899fac98036617c4f1d536ec297a7d4c +size 34794 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e74023ffab1dd72c18535f0edaa483dad85e2a15 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf29150304c2c302f2bd2e5596345e1eea9baaa9ce2e980bad8049141cfb4cd6 +size 39136 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6e8ae0b7b5aa733e828474f82a4a5cebb325957 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ba97ff3cf84697473195b9a156aa035d944d76ef92690102d6ff950c42e1690 +size 79014 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_159/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_159/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f838862e1ef459763e6e8a034b64cb6436668b74 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_159/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380bf6d18c52986b56a99e8e83c487b9184ddd88e7ffcdbc14a7dea87767cc47 +size 20976 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ec7fdc93fefe07b3d920e8052b460a9c568ad9f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b9be1dfb8380121a267f57b54a1d307754c53273bf90141198439c9164598a0 +size 16557 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7903f6d0d1ff9d4bb9ecdd0babe559c5db6bd8f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:784b7abc3e8a195079ac88e24709051166345d3e3cd30383ef5b755b4ad6a5e3 +size 75199 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea26c0c1bc91592ab2dd83a216d11573c669b0dc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30621a1b8d7603eaf1a0f6061db8df30b08cb5b28f5f24e5b55e66c53c0699e0 +size 26901 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2057611f5991f8f5dffa6b3e278d68371483376d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3085c75c5f264f5a04612b919849eac1376ab7d2e96c50f5f18bbe81023f82c0 +size 43158 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45d7a6d890deb234e50a480374126e592745c235 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb27734800735f9b78ae4a1482a060e4a6418d4cedd196e706b40ab06d25961 +size 48310 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e69c83f86200227cab2800c813432d2941d14d8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3f187712a904c12a04ba29f994417275e7acc6938a799c95e7e951a9863440 +size 63650 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2db5a0f570013fa41ee3d6f156693bf1b61ea2f2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dffff20bdf44bab34f17f65faa5e1d3ab3544f78d18f90dc90a1b3427b87546 +size 80722 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8be883d5568e25d134a6bfd59515c9d94c7900b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:792e9d79016a484815d7d390354000102d96ec6e2f8d6de1ca0f84af7bfd8ddd +size 36706 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbe7af1646612d9f2ef93fdb34842a6ad3a4ecdf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea95e4b9715c59b3f6e2492e29f6033ac63107149781ec45db6df5062a79ef68 +size 47904 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..158d71d3aac2eb8ad2baadb39f71610458c0a6bb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2eb1b189b5db0c3eb6ddd271dba1567eb25f7991617b2a40409c11dfb33ca6 +size 68083 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66558528d93fb24a95442baa4b73f9d1b3d48510 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d4e6c0dbd1d8db9705d6bc7c964cccab8f77c072f9d4c379b02f036d3e65ef +size 33070 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4610cf43b92a43570537cbf4c496fdaf8c7e788b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:412208bc790886081cb927012943dd1f5b30e5d0f05f03cddc226a4f90621ae6 +size 35411 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..514df201235799665997bce242111002284f8e05 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:451a48043a90a9a0cebc7e46d7bf9803fc02daf43bfb36ff91634c9249e3b086 +size 33095 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a0e8bba236ea2c54e75881ee21d367f1f0c1885 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde1802f267ea4e0f9e977b4c9821073f0c6c2680019aa571eb9deeb0f425493 +size 18658 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9540aec405c87e4bac95bfcc56dca748d295a7e0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0a076e6a4947f412b0964d80716547eea8f39dfcc4e1b2513734f3d7f9c644e +size 20899 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e9608d322d45ac3ffd109ff22966475b4234146 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebc8ba5d16f9f41e45b0baca22c52ac8f550350f4a63df8a73a4ded86a630ab5 +size 13604 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..076bc88553d12f5bef5d1bdf222c1471139833be --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67f772f3120967a2feee6f009ac61dde04a4f710fd18c4bedffdd465f5625381 +size 35938 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68f75f5cb077131dc4723f48a8583f16ed749e84 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dd3ed50437178b00791577abd40ccd49c6c4167d033d23a15294536294d8414 +size 41882 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc050bff01d72d1c8b380d520af727da0980a246 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929e4c392d4434c0a69e58d0bf52c02af78dfa4b9ab4ba077ce678f8a0daeb05 +size 57923 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7eb8e672bc4f34d830a542cb20ef618b55399015 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a388362db1f2b486c1867f57bacbfa845d7909050c1c4b4717d0bb94c28873f7 +size 27585 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95f94d567bd9b742c6bc22491bd37e80417e2443 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6f8ce71fcdf0a7d9989fba90184db3dbebd4ef67c91febacd58bc6f8e7db7e +size 14805 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7677b131c916581c1ba210f434b5a3ab36e86d37 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fcbc26c6e0a9ac5c8f54638f65e6dc1a24c5a03a77f8db28139d4a6161c375e +size 45499 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21c678b569a322f4a8176cd5e535d5ea15b27160 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ded385d54f0b13eb6a60c0060546e4cef71a1ea0d170b5e7b13015fc69b7d5 +size 53256 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2299989e1fe07218225b5365fdebfb068f57e0c1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d11c2710edc2cc19273c26e254d7e6e14d98d8480926839f4cbaeaf3c656bc7 +size 73264 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59c73c5bd9d20364fef6b4126f76cdaa6d60b11e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc7b49e06d155cdd7f26a29ffbefd5130051cd9ce8f8c5e4e1c52f10883df673 +size 34614 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a90f3454cffc86ddebfc21ec43d36d6e251451dc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4963c9df0c7ba2b011ba5896e28315f595bbe6adf9c81d6bb8ecaee5231a3b +size 39214 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f040d767df8c63d0f4ff7c24d30d1827114f89b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5a23ad31ea6be7c6886c4290f7a58340bf8e4d6dae9625c818a29e580652163 +size 79684 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_162/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_162/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ad864d980dacd0de3a41ea3997a0eea1ec55539 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_162/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d147be9ca551c81f0b0768d4d297ed1833099a44c3648f4651de204585016b +size 21029 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e33114cd5649def73e9b42206798f03d560576b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38ed4fda2febf91e221f3cb5671930c8f6e242cb8183f1e3772c0745e36869fa +size 18089 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..330d8970752439b35f102573ad1cc1275997acd3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b10bf7d1daefba42d28265fc9a0ad7a01c8fbc8359f5673be69728e07c13f912 +size 76125 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96189772b79d948d5f636111467adecec15bf373 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d2291414cb4af314e831c4b393adc3b3c686058630a174604d204f479f3ed79 +size 26905 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ee63962c45d89b0180d2e95cf0c4394c6f12b18 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec51db813f999f4521e0e9a4ee2c81dc56ca7e3fada7ed0e7c5680666395f4d7 +size 43883 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a956a6b856b7374480cf81e59e9bdab95666fbb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf6b6cc7fbfd1f9e3d1fe0073e9bee29bdd05d832ea9562e55d61095143c4f1 +size 52718 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24ee29c68fca9137c76b89771aba7e28cb028669 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c46d44a42d86b2bbc836da060ee7ccb9cd934b75ebed7790aee7882e9e4ac246 +size 63644 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75ef3a9ab8b536d5fc7f9bae01fbd6277de7ef59 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae63ee3dfeb269a3e02383d48f8f29020509e2f3a0c1e6a25a2e9bda505336c9 +size 79465 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e21afd704858fd29a5dc6ed0bfd6aff570b72599 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a31035bbbfb9b24cc0288fb07d7406d308a94034cc75d8fa3b3f06696343e4 +size 37496 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..306554b9d3caf3812e7cf303ea78ca0dce97ae53 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb30403ce03c18bd9c3604085f6f1dabfaf8a2ab836c72ce7e089a535e09172b +size 47829 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b6c37d30664c10d7e7448e813aa24edfb43606b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dd68f1b78566d6da579b8e68da83f75ef1f1affa38e64a504435c72d2b81da3 +size 63234 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e4d01d5099117363ae3010f02639841f96b300e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d074eb8cd6b3a5ee50e9a60773d47c4725eb337ad636a5287a8ff030aca450f +size 33048 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b85e360ea24003e26b7f2771ebaac549be46db6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecacac47dcd57d913e25247499aabc13f0e887b24f0f525a3d3fdcdc53c047ae +size 36099 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..891b22069988e794daee1b71e487c8398718ff49 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebdd34c5f72d293e5544381f313edebf7f44c82138bf4868a07a11788991cbf9 +size 34147 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbe1984bcb955b40e7cab4a5c9debcbd082724f9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8513b068cc74aeba8cb69896c5b78952d811e663ebc1cb1c57cebfc319433604 +size 18642 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57d37e92d8b5b5f0706e043161003877c6cdd630 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245625296432f84928c6665b92160a2de3e443f3696c4c09ddc414bef87b58e5 +size 20822 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba3e36283ece6a7a2a8ed5ae7325fc627afce857 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25f89f742fcca79afad59882e88d428e80fa8155ee46c1ed7cb8d74bdc3b31ff +size 13493 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..119ac99902b33fb72f92e20b754c83b385eb56f2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac60d133319366bd7e5391d2d41c823bbb93296ebf0e0f7138d76ecae01e315 +size 36012 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d33d48057a0c71d47ed02dd30c6c1467a991015 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b3d2b3131a57310326105adafe8b956356afe3a949ce7be198a131f5fd326d9 +size 41899 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a2844ddf22d94b24c6a0e042a5be4ce3e67d58f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85a88e54c417d5fc905fe6bda6bc69de04842883c1a1feaf7acc3c542903d94e +size 59168 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9af879032462b7c62b7a68ea833d123128fa974f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7bdb5c796c4e7f9164695f6ca602b356b7a083a08ed8ed84815ce696b49364e +size 28172 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c047628a73867223ad81c855f46497a621294d6a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e25e37dc6c7a1930bd041bce766ec82022e355d5f08ddc073b51965996c4634 +size 14924 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da57a24889983b72767af4a42c4e8eac8f1086c1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ae311089a02abc3bd2df9924c58fa660c01b5e86f95a30a5db67664b5b8c415 +size 45217 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14655170624a8d42e83c0bd61709dfbccb7143d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d63bf081f4719219770e77a61cd221396f26cacc0b474a17732f86f151b5e09 +size 53816 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f9355d49de1c1f5b4f11de7b143a31262374603 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:818c86d5bbeab208a65c78a2e688406b7732b0620059a0ebb67034e320772ebd +size 73839 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d27caae114d4a7f8cffd1b8568f215eeb2423bb2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed48d89beeac628d7fc7dd569f574b202dd15011e95da48cc55f5a44af200a8 +size 34756 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d93aefea0c8fd9eb83d1dfdff6af330d3be5fc8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd24c538f91f7639d2e8e336e22a021b7a114096f958c2b23837b2626a0e4ea +size 39209 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbabc37b633bb4ce7cd5389e60e74255d478f5b5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:662f261e21a034a5d00a7faa2fdf1d7f251397220f3851a6a52cb6d9d9b39736 +size 83931 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_165/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_165/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..638ff2d259577df7bf8e80ca067c9b4206af4c5f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_165/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7589a2d0b371fba6314b80a74165e53b8773705c7d4b28380b1ec86aeda22ea +size 20993 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27ab1a88a120720324162554b1b4f9224ee78b1f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c31081b4aee8d41770e0524a04463f70da17b2e49eeb68fa43bf9520eeb79b +size 17673 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5eaa0587cce39292e5338b1f847ca6916b768b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d0c7ffb36f4814ad950ecb7cc6c5b35efceedecdbb0bb1cf83d20ab81c8aec +size 74305 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1497d4d783199408a9e1224ce2863ca59db7160e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:396461281373dea6c5fed04a2fbc5342f95efcb3c648b547c12637edcaf62e6d +size 26989 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe1b3cfbb73d3481f404e6f9b78e79422e6a63af --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32acc4d27f34cddc40276a276dce26ff0b6a1342234e704728fbb8294214057 +size 42143 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35e865328726798df89dc9955432a4cd8fd4e876 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c4c1f5052305fb4e065dd4045eef402d167e857979d16dc9333c6b8137e7b99 +size 45919 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12c65794735853ec943979660accfcaf2edf63ab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a48735c2d9926e9f5ed580bf9937d0a58c954d3d80cb912550f09bb83c13e0b9 +size 63891 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b812616f2cab59f408ddfe0b02505a8a454c9cf4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6179e87c29670da7f5b2258298f5195962bbbb5bf6f5c7c9e65c869b18cd27c +size 80347 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebc9df8f847c094f689dcc1f35e0abb60d538e88 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d3821336b75c4842961bcc82e44d03fcf3c970b1cc309e7b793ca38337cd00 +size 37173 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba4eda04492cabd0b964dff4ee00e32f1124ebd3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccc5980588c7b44663b5dbc49c9f560b2714e0521fde31aeec07454a4703ce73 +size 52228 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12626086c8d6a8d28c4b4e0893ff8584ab3bd059 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f4cde29a0f8006cda65a07781b333d19182877c8a3320fc7074559fde47871 +size 68266 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db57395460adf36cfd667ffd8b657b3ada3e23ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608217a62d55359a0b25ae6b801d229c64ae1849168b714741ca70cf63dafb4e +size 33291 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6fc3e5ec41077501eadd861e50dff75f8b0d814 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0d2e31a9ae08663fedb971fef04bd60ba22b5837e60c1f380bc737c86af44d2 +size 36016 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39751690c2a4f1df220709f7e151b42e54ab08dc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cccee327d3a9f088a618db06818ae322c7276905a8f2f4fa123b16e11a1ef84b +size 30315 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7cf3682781d5c259a541053c093fe4121e4d9b8b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e2a553798094f9901a4a9a4449c290354089f617f4526df2704cd982c8db400 +size 19399 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3466867575c66c58add6e0e54d1f5dcb2541c57b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f28be855209b96ebae38120cc82011d317aaa8b3500de37a892787566135c74 +size 20859 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a6d02e5796519218e97809244a24ae5523b33a0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6eb9462bdb408dff66a8c641f727aef99107c25595f108ecfca73f79f13e2f +size 13648 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b1f643e02643e93328ccf5f6039887b2fc3e5c6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72ccb103f64fc87a1130ee2d92306c3d60abae84b66f02c54575345e27d42a83 +size 35288 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..795b85739ed24daa7135cfe90c609aef3d68fd2e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68286cbc7f9cd7c2c001236ac82d596dab35f4ad51e16381239d522f7468c8e +size 41352 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f056bdfff49ea08bc37b9a362e19db94d908a40 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6462290d95ede226e568c2f2d70f8e4ff0f2542797dffc1d01eb8191601a6ca6 +size 58468 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd9a79f3ce3b73f15265f08bdacb471594b8dddf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e53716be9e506a98e1898b03f9dddae9cac1f62078bbf4f11ec5c99e524660 +size 26622 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c67d922c27c72c68bc546bfc75c466238f0d54da --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fb71436c0526e116592b8388810052639f6ba0df5984a6266a43c54e9afbbb3 +size 14826 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4a0beaccf07bea903ce37d280a85a1f8ef3509e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a0f45b6c067b70748cddc960560a532256eb841c54bea153f9307ee879c51b +size 45359 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10607a2de0c55e8f81c30372c14876df0f6cd9b2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188493988cfca5005af4884d8ec708f6753e32889fb0c24e35664492db7699fa +size 53974 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec9b8f18dde4df1f609b4a7b476c4d0a31cb79c6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b94c49c391d03a994950576f9116b96f0b6b21835fced46b5c6392c5cd1dd3 +size 73826 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4136dfbd27bc7ee6fd0a099e7688271c0dde6e1f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0973f6035cbc9904104a242a9bd296961a59190bc1e04dd49519fefc7463af72 +size 34921 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80b7faf4c202a5b4b16cb536879f912c997b762a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cb0d016ff31cc760ea555edda5d57cea11fa3fcb21184384ebb07429f3086ab +size 39228 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2765680ac908c5d508334593f144f341e3409657 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90b317c31e79adf554782c5d312d2fa6fc467e46ed3697e4b5390f6f3d0f242 +size 79266 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_168/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_168/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1966580aaf76393023153ba1ad7ffa8010d0c7a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_168/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb364e262689cac9d395a96fac720255c99c2e92abb4edb002d3ac18cf29609 +size 21002 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0ddb68df5f54057d6d5c59322eee4bb3be3452d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19411a29a1330c2db930f067e132e4002a2dfae2d5534c1e1d45ccfd6cff0650 +size 16151 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84fe255e845dd06648d58e8d4d2e89528264452a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4816060ce9ea4580980d2199d9e3ce540ee2094b65bb9d28e64666f4745d92f +size 75276 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f6a46bbfb33b01db783c47760e42e380a738ade --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f09740f4b3aeec11b41b2e06c508fa24df8a7ed025f3da9eeca1bc7170807bf +size 26835 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2944b0d5459ddd86603c9709ab40bbd416f3c160 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe9bc502d5ff6bb415ac30cbfb6f663fc4e78b277ad88e9d357811a0df00ac5c +size 44362 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc7798640de2ad423143b759c9a8039023f41130 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4e3711cf1884e0efc0ef7a9f205724383b70638d01dc38bb53c809df11c1d03 +size 52724 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a00f338c678bce1c078abea3e492f2a771aa6c91 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1887f7db72ea8f6d950dcba43aeaf115db0a2edfafd1d46522f010066b4eb547 +size 64376 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c4c70b18fd9497ce0c428cef523e04745ece2ce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d47095ac2cf16fa7d3e689cc05da3f1b2eabbea86e54a1a6e9e0091136c0b4cf +size 81039 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7cbb3460595adc053bca2481e6c9625faec1fd94 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2962a5e41e845afb6603cc0ab4a37ec81ef7a83d8222438d9c75e44310ddf9 +size 36867 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40513bdfee8bf72f0b84924a4229ed0eda6b9c43 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f1f7feda448a4e16049e27033d2d1e5d6e170b4fcba5aec4311749b524f377 +size 51560 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f048e9f0e52dbf289c04ae5967f143b218efebc8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00c1008ac9df61f65beb779a480408139056d4b5ce90406562a44cc7e72f2dd2 +size 74471 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a77abefa6180824062087a2fc1bd4047f4cb736 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b21fc82f4d3099a80ada651be10f7ca627a2aae9a8e961e09fbaa18be9f02714 +size 33242 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..926fc062ba629956d66572bf445a2f6325da7cd6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a672225c6a6e56fb77559fa8182de39c22de12fb2e3366f7625adb2ad5dd4d30 +size 34391 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59575c6f4045769c8f63a06a6950e7ae9e8fc390 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4498d13c72644c15fbf3da4dd21d3fbfa7cc710d268a9aaec636660110d377f9 +size 32606 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3fb2e127e7cfbaaabdad46eeb9e7aff2dae6c7d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e47f82689f7f93b38e85d1e38032f4f431f5c83bf0b36f68c87475b1fce5374 +size 18999 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce3bc86f0c04cdd069070eba1fc94ac9a4c74adb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca82138ae35fa1cf4d28a9d82bdb53c71064e4c6014ec3b24e4de8cda029da4 +size 20890 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19e88174421bcc2c35c170e7a140802012a9226f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2087bfadedf25596ced58441e291f1cb3d2451bc0fc3f547fd2bf3b864c07f9 +size 13476 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb2aad51f6a5153372006dbeb994ff47ef70b745 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:815c723c62a3c38936d13db9e24f37052065dd354fd19c28e639b5a84c6b6389 +size 34730 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eca00de5fa3da000ee08e1fc16a891dc1f82092a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7f4fd2c3aff1b2ff755a71fbbfbb54271c5a1cd4cd47a1e88497c5b3f888c76 +size 41722 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6fb95a381872f54529a0381bd94de796b85ce022 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d64894305c4ef599602ef918fb56baf194cfe678b7887aecedbdac3dc3c9753 +size 56902 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..536e15c7c18cf23b0721da589807e134082ccf40 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08ec0564abd2e08d104f888cffb3d666e59cdf6c23f75f5a7257ef3e72fd7830 +size 27665 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae030af7bfc87ec316238d4fe8bd3e570048baa6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f71d7d1013e809f4163799362ff7f5fe57e891929bd12e4920195fe3eab7cdd8 +size 14593 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bc855c803035dfd73cfa8b2d3d981de5162d5e6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49c7c5c9875835b81d321fd4f27aa4e010988be70c624eb1ad3cf85950cab0bd +size 45625 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b54dcd2283b9fdcd73222a44d903264128c32ee9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91684de04c91e319a0ad92181c0d84b14005c57fb63ab6a4bdfa4e5e9d0c592c +size 53827 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6765792f1d35a6f5692c17c810b8b3829da42f2f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9cc4e6d18201434e277878293b764dea066ded0e58687db6f0ce5b6e94c8c87 +size 73834 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e26313f6ec8a2ea29e4aca69c8bafb77d3e9b10 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff06aac60f853bd97a5a97bca377b426054390b025e870843db8b63de24220dd +size 34903 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..118ea3661761c75e339dce7dfa3f9e48c005e18e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6146d86818f451b8d30d8028e992e6ee152fc945daabb23b03790514aa1dfa69 +size 39248 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a394e893dc3c39548b18593c61c2f557c0072f7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff42a7fa1c6257e448e69ba6a480fb89cec3d2f64f4b98bc7d0694f12f4908ce +size 77514 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_171/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_171/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cd068ac124d08645ff8bea4ee4e84713ec6d97c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_171/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d8ac0a2dbaba440a2471d5352243a9daa6f12f898d8ca418287068dffe22567 +size 21009 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68ed6efd8457ba43f20c6a7636559c8e342b0a96 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32875cd9605335b2ffae5a57a9fa6c3b686651e411c163dae395f6f0a055a3d2 +size 17237 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28e03410a7ae391d834476b7e0f671091c66176b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b04eaad0719cc8dad843f78c83157f1f9ca4d2c3be9f31c6c401985e4a943ea7 +size 75699 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ded11412df42ccdcc063689a99582c04cbf6a21 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23184a7733c7a59998f8c77aa39be1a098d16c30ab5ef1ab702eb02eecc979f +size 27114 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..366e8ad70b28c6c92777c332ec00850233614e21 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ea56a10525d2ba83acae1d7c753e4a6372418cc6b8ab785bcd642dd49cb55b +size 44076 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4632136d9f723a9a30a539f7135b8ca394e64b8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eede70c6691dea01529babc184e775e3b279395b327167aebc5f66fc125196d9 +size 49788 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b4940a12c7e8e0c109c2c276543461d4d2b0acb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9443b9a584893dec39a0d9c046912d6ce3129365a06f96a0f1ccf78e1f66705 +size 63378 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..442d0a695092cff84d66f33fb081e99e72c5f723 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4365ed0553b020e5962e264792204e0e0ef3e7fb88394147fcf47206faf1af +size 82873 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31d83e759765eb626be32db98dd87905665c3fc8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a472338120e406ca83eee999bf58f8ad95aeeaf3472aa11db574943da4fce5a +size 36983 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a169f80d387d120f72ded85b2754b5787c64b52 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0eb5132d7b5cb5c1caabe2294912dc3f64a44cda41b58f930bf51b9fe2234a4 +size 49590 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2af2d872622442cfbf7af43aab43dbc5d3394f2b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c6d0fcb69919101a31a4fda9f23330245e6dd615ab6c74df757f0fcaab0df9b +size 67940 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..632c98c46b1da4b2a8b89bea6be1d86cee4ac90c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6d5d95863588354abb51565acb34d4ae3b591a38cb712cf2247a52f540decf +size 33735 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc9d0d5b4585584583af2ed7f71e2fa569d413dd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a4f76bf9541b480b725d782919bbda972415574e7e94a9cf7def34eed21948 +size 35972 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38f0aef6e0c924427e4ed3d16ccbdb862a276447 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a9cf6b315ae2a6748563408e13ff174fdaf40f08e0c94d03d2ccf6b9308668f +size 34334 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..918d4a19c99189e2d923019319287dece93b5a11 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7be39179d0bdc0a544ea24facf784add6070a9c340544cc9e9975f72153320fb +size 19280 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a620d1c7f81d68e9a1d7a36af69dbe77cd50bb70 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7574c5ed000ece47bae72b74c347372c6c54850c0c4e099c7f02c8546d1fe62c +size 20950 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fd9b1b6f1866af6d83e3eaa766aab384a4852dc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd86a2180d12f65ea68a7a069694a7b0b6b180a9c11bd250341b8dd98250186c +size 13475 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5d21fd5d3a92b42ecbfaa2edd45791f33fa3e71 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d651505803d6348d8a81d729eeed4f48c37bf4937c15d4581abf99576d361e63 +size 35583 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10dc57b2b70d6a04460f6743a5aa54e705a16a6e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af11649434ad2c048b4691653c7776fd8a31e7e2fa4b4dbd732dad61ba95f21f +size 41924 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da3591c0f9d1076c6ca08166b961ce9f27b64b3a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3660cb69459dcbf1b06ca67a86c8b5406c2531335132addaed77d3b2c22b0a87 +size 57891 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e55df6fa5600e3660f4abf4092404d9183a9d404 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db4feee54876df3a204c64585f4ddc05b77ff73d1d146cf15f18f8a4c5262a8f +size 26392 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06514a5eadeea4a095f51705d1794e725dae567f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf7dc5096940b98ea230d28d37f3e7109a3c1a0cc9ee3191cda78500c6c7779 +size 14841 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41053bdd515247efe80376a82ae9a85015302451 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:564fd173faa8038254159f2b392aa1e35e38fa09578ca236895f33fb00a788bf +size 45460 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5969342d3a800054796e8f9ed0c06f02eae36405 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606b880ed9d3cea9506303506a54d406eb355751e419cab51c72e019a3c16d24 +size 53265 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..681d78f603c267e0adb23d99790d33df915dfd45 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b565d06a7a9c6cec63bbd90a88b120e8c283f115d42bca096c1898c421f3d2f +size 73273 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3798196719500d2b56ea4f695e356fa0f1b5f2c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbf739301acc902f9ded7fc9db8f87ea9efb50fc4abc7d4e0c20d44929f645ec +size 34716 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11d8e70551a697e7f8b10cdf28bfc00205144fa9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b151cdfa3b0a75573ddb090f8815a844a78b8f3f145c44e64fc5d0f4decf7908 +size 39192 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff34e995d0f99ec3dbcfa0c513e51c6524088aac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1274a7ea54e7f32c2e66ebac5a6460fe53d54fd6874da97c9fb16012996c197 +size 77805 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_174/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_174/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d859069f2162061453fab5f5492fce5f16736b4a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_174/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:662385108d923964098007f0dd3d3484da7c0efdcd834230a216e937eb194ae8 +size 21012 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1d98f297f73097c339aebe430bd723d452db672 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5a3ad21681c457352c4784d9f8ef49af25dc8eb5327011737091a97f7c466ef +size 16962 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ecc901c57be1a95e6fdf8b365e026edc08e05d7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e02169e4bf370c5a0d44dfbc0c17c3a2b92fd23ef2d9b1fa5f2388b747c7079 +size 75925 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..132cdd519488d05a8025c3019b2b1f9ad1e5ee9a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f23dbf86d52ef2446abb367d961a72fd0e1d6e426a0faf4911b0ae569026126e +size 26994 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0e2d01119c69f0b572e9029a4e8fb0a2f0a7e84 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09352f2d8ed7264f89a0a7859a19b01203fcfd65a333146b78a9739070446756 +size 44255 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93b8875296bf42ab61bcb14e0e8c45fd7eeac09c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c7704dbd7fc521b1ec4facc504e3b1ed596bc5cc212debaad38bb1385a9c5d8 +size 54481 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7376e9bb54a24fd1c49c36349524eca035d33525 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f26f5e2b8387c4856eeb0b32814dffa39672748aa45b68ba42b92d936f11266 +size 64067 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a41a92c2e5cbe12eaa1954c33b2f44522de7e8f1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32bcdb0ceefc86c73939699a32d4afed79071d30a12321540a639df1e7155022 +size 78850 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7bc7412369b74134a6eb119cdd95bb1ee8ab138 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb864eb5241308108752d963ee807a7d782646192e49bbf9aa06243e454e2357 +size 37808 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22ce6432b5f6d376d089600ab5dac35c2476b1d7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6f2ee4500a23f8ecf549f6fcb1a7514b02cfc81d28b4b9d13f6011f5e93eb0 +size 49763 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd00acd90107e2624a8f056201dad6d54ada2bff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb6dacef375954285921c69bae9f3fd8718cab464855d1843277ee527d1779f5 +size 67133 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48b25e8c19800cc444db5c34e9f4377a5902ab51 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb74bddcdeb2268412f123c1773ce091824e0fdfa5a20056820eb7e10847caca +size 33316 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb01504270f237e00413423e8d407d4fca6a9cc5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10bb1bd246cc0107081e73d358c56f2b5196f3c8c6cc5ae690cffb2b084818ab +size 36088 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82e328c8c48cacb656118f0b5dd4d259fdedb25c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ad86c2016318ba0d1de1b1a37e8a59571f3912cde59861a7f8435b861b8a94 +size 33485 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84225cb34ea2dc458773708ac0fe10fbc2242489 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7041b2e2cccf2ed3aaf3f9b56fe747a91b78f81c25bb1d4720e69f40c307636a +size 18724 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdac37c1586a2a4c299a144f622f954977b0acdb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e754d08335e8271b84baf180d75e1e43041e1dfd2b1c8b7948697d98293ebfca +size 20811 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e5dda38add327937082454f8bbb06e25f151803 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de0d6ef1e85511b61fae0705c73e5bb8c354029d1200405db230b62506c5c715 +size 13372 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ddc2e58ed96525e31c8bb6c279f2b5f358ec9585 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb307ad4929a5f68a22dad8ce757be405b26a87b5ed0afa264c799b378ee5bbe +size 34630 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91b3c47ffac214c54874926b02d7325d31f8b22f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e86dd8161c581e476eb87bdb4fbcf3f9633a7268e2995fe8b47151395a5de52 +size 42281 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2042d2fabf838a3f12222d937907c895b76b3d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ed866bf6078ddcea10f6bf5f289229f8141241760770714812761455bb210f5 +size 60390 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ac81aff725ad8d31828436c681cc6e688766485 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9618926d4e8da805d89c2f37bc2665c857148b25ea13835f52eaf1a29add6fb +size 27699 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3f3c49d463c96b500df639f280df3a82dc0ff5b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bec9a9acccb9dab86287518e02e919ba1ad85b8bf4744f9b566b0b2deb93797 +size 14823 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53458dc08ad6f8c4b86c6f40ba5233721c64ffa0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:129af019ec4d3cd19362d6ec44817636383f22b690cb5b25665e9c331cc22b5e +size 45476 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bff5e0e696f403341becabb677251a5ebc508f6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fbdc2d863e0eaa1cdcd98b89a008d5dc72d05d0f24c20554816bdfd56a90bbb +size 53237 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..528c31382df9226e47d008e4313a3354256f56cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49402274ee02e57ff606547ab9ccfc8724b8003b2c08c0b0b7fb17c7e7d73d34 +size 73843 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6c1f3cfbbf92da30f945dcb947109db48937099 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ac2684765e270daa4ae65f1d1c9da44f9cc787087c14c5563a778df5c8d1838 +size 34798 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00cd0a63b1e0265c2d933b2bf43efcbdf9de5749 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab77a9e1cb73d4a1f567d411b0f3da9a432857b66bec721a9f9dd3cb22f8494f +size 39221 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5551bdfe447a9904c6706b353cb17786c66ae5ee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae66171927b074bbb0fced9029d2c009744a5ee38bf55ddf2e372f8349507d12 +size 78678 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_177/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_177/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb2a97dda259c9d0f5ad5873890a686c3c20b235 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_177/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3aa5ff64682e4ed0c2855ab9bdb397fa05a07089131a34f78565c8777f44cd2 +size 20980 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7781d7aef852f4c89d35f74a817be21c9eb27cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dee14ac9d9282ec0d72516a8655eddd723f524cb07f0e8034d51dbbcd3b331e +size 17239 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39cc0f7a06ac5844ff7208f3f2da3b41abd45173 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4c3c6f2ca386885958690999434c6899ddb515a5e9de1b90931b0ac962ca0a +size 74924 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..277f7ee57371b38c145100965a296be7af9a34e6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f98c8631a72ab6adda89401fc757052f1399b15103d42b53305a8f980cc7fad +size 27082 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7a365b5ecc5894f5926561b32c5682d9e2243d4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ad24d59a3059c5e7d338333c434fecbf76e69dbcdcf47551757cfdc40b85b2 +size 44064 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4deb8bf2724e930d6f72545d4f8e0b56e17abd9f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a27ab70333434e7360070de6e56ee8cfec72bd082921da87fefe0855dba98753 +size 52788 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e33998756054fd9ebec1637b576e323439a373a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88101da9d9ef80ce7959b0839c61f547a5004e9e2b5ea661dc7cde6856bad3b1 +size 63281 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f55b06609ca49344ecfdb3535835969f6b965e8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:422e5b0e2672658a59a69a49fd3a84b2a2a779fd0cbc7dce88092eca788cdb6f +size 77978 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f11da45dd19e61cacc1be8ab941615ca1dd24ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9f4cceecf08e3699b7aacc1ab209fdf6d6f2bdf7fd5680457cc387c5d72bfa +size 37375 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edb072e3daded2b746d5dc8ed3ae417f4adbac41 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e5d199417ccfe94b387529f0d6332918d1d407122833270927dd0f5ef16862 +size 48430 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b282530cc1ee46ffc1961047b7d7a1518a9365d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68e666523527ce597bffd1167c6fdc05bbc7caf5668c0d6c0ffb4b8479fd5c35 +size 66580 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f2682c9fd027ef5c624fffbcd842eaa0a715189 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecd2795473c5476565e934abaf598568deffa540631b2f6253fa0c25ce94cf69 +size 33362 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..991c8e6c2dd3d5c617a58fbdc5dc31232fee2083 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad99f9c36de923ca2ffe2492887598e771e70a8b542a0ee545acc5f4ab9ab7ed +size 35082 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..396c4195353e484cd8fbd96c48b49884471d0e2c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b709aadfddf424bf4c8192cfd3831241b9f0fa78f3704cc800de90ab5ca0c0e +size 33738 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb401fdaaf938b25e39c360b1e95c9f9f7fc8e6a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e91b1f4d7ad3c0b58812c662565c9e6291f0bb5954db1a1d6c6bb99c79cae18 +size 18997 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e7c2de3d748304aef8fd046982fe16394617ff5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cb1730f5dddded1fc6efa9b3a232a0753bdb5109546291126cce6af58d0daf4 +size 20846 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2dd97a922554759091cf849572bb0389fde18c3d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc2fa83537d5edd51c8999b41a936a37d079fe1a3fc8da85706381b22a4b14d +size 13151 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b1e147d38770d363675a877a17a28cf434c4087 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c57f1758b208977578aed1bc043d612282992972f0686c87ef660588c1d29c46 +size 34277 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..059dd4602542f4c2041f0f277eacab4d3ce4f1c9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1557079616b25d12c5e63a5970c2588e42fca5ca6308cd125b1d06e3a9b70226 +size 42854 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1796fb9d124cc2d7c85a5cafb837dac61f121762 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63a5fef4f46546e05ccc898d3abc4c3eb9ff0a9928d7d53ff1cb8fa122580be +size 59345 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11f004cc5a916d01b97ab1f2337aaa92af7b65e9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56da63f105aa6f335f7b14cebbafe43505d9080de83b025e53e65d95bd008d64 +size 26006 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f41afba2833a00f27e44638a4e1d2f28bccd922 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:982d13dbd75fe82531d642e6bb475ba978b41f09392ab10bd2126cb0759ac77d +size 14921 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df3ec5996e0454b67d66120cdc37bf52af48617e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f79a37ba9546f07c0ab114af4fe284f32ab492ee809d4cfef0b898e779a0d7a6 +size 45688 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdeef96d326e7799ed68474764bc804802920366 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f70303730d08491d6dc376bfea0c0f453860b8f0ce75c45d13c56ec76814e6 +size 53581 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16db80a5d955171a6f2588186dda80c263320b7f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aea7933768967f1af72b978d434fd5e7c155f5afc9e61b39464c2f7e3ce95ab4 +size 73649 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..474036b8816717507315b2c0760bf84a66bceb8d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88fdc86dcc3580767a121d6f464505d8d46cf791ffaf1de1ddf7c4ec67e30ea +size 34912 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e56ab3a10f4af5aa91ab2b8cd3889dc6128489e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0faedbd92fcf24ec8e84696ce12543d16ad454d80b80433f99069e5df7a6ab5b +size 39194 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..924216cc296dc1baf5591fc99c016f58b8bbea0d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e55ecb473c9972a39fefc575c202a0a4126e1db1a614b71575df73fa04cc71 +size 81475 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_180/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_180/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f79a04bad3fda9d2eee5ab530b028a73e95523ab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_180/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9eac00295cc5d6eb77266967d80efd18a6adc0ae6d8b7c75cecf9bef2f687d +size 20984 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..138990026ff12515f946ba0873f1d2772935a99c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d97ea007a64d0fa575805ce0b0fbc11393b0476e251ec9f3566ea82d907aac +size 17556 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f74a5376af7398d859f714153a66a148356f10ce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ee692152a45d7856c60b20a74f4f130987c1fd95dcb01e66c79989ea868391 +size 75711 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a708d715f1c437241bcf55eb295469ae0fd2995c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcba32ec798a07279475dcde0b1b0f16c912a7c8cf95069e0a050657dd07e0ef +size 27155 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf35ae76191b214fec3ddc37455bffc568edb08d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29bca2628b32c658d35024b6b1d1f8e7d2b8afb9306d5134fdc1d82167ba77b +size 44534 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f03c506978f317ab308059ef2fb82ed2b919bef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13ecb59e46e4d76265186c27ff66c4a759a218c7d1eabcb06ee248ec834dcbec +size 47125 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f331127ac7143dae499b0e8c3f5b6cb5702da1a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45790f2f699bc3b8fb7a4404164f4424380a344e3faac24fcfaa9ff0e2c6199a +size 62740 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df580147b91103888c909b6c89974c913c22236e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38da1814954a913bd9f7fffc12ac4b4fb95139fcfe74f5ee9b8cc6c2a71589cf +size 79162 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2190f3b9ebd0abcd50b5ca58b1e22e1dedf5917b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:532251b4be6c4e1b2c8b4d04e292f34a4f9be94c712dc41969151851adc6e991 +size 37700 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40a4098c69dcc2234659db24fd63e119ace48977 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0f865ef15430f74344a588006a58fa7ef13e54589e02d95262af4b02901b612 +size 47529 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eec6f5bddd5554ecfd171ad293ef0ecfde819ec3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f6971dd0db16b5968260364016191e5981bb6beb0f5f70ea32c28d3111bf040 +size 65625 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55e84e381bc9bded484e0dfef0a97ebb1e3ab47b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b357206ced273d2be2f50cbf36232990e7d3e86fd89a6bc1cfd1f1a5770034df +size 33433 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f09d9320cbf674887d8af6c9ec0d3af665926aa3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d7a110d99468c6a81dc2d9111463f324f69553d46e2d9ed09cdc07c7682849c +size 35329 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..896d47a34988e238968fd16daf432dfda8e1539d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9a855ff2f7e04e2d2a28711eca991bbf0e7cc36d5babdd4f356aee006d85672 +size 34380 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e25c4ec905c81cc16c1a20d5d0ea3501c1e9be4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd25e4d7cc48a45b48dcbe644e259b7a812680064553b2f3cc3906a17d76e48c +size 18747 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7aa116388c10a15561780d23840cd979804cb62b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:267364b77673033be8e070f6ac65ea34d0fe4078c9646069e58f5f35bbae9726 +size 20874 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c5e99fab7955a0515b4e7bc861a8d471c5c960e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4be0cfd37a57ed594aa5de8f88bd70b6553095efb446083076cb7ce6c65122e +size 13432 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06d9d0b41d94fc4017b13cd32435ee2424d89d0b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dbf9e1d5b69c1f8503fa75890290a0a4d1f302fcb24d915aa54cd315b4d90b5 +size 34583 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..183ce57e4749b5dfc76860681cbf38d5569af661 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb82c2313d4334b6a0ccd613b96c9605d01399fbae4187886e9339b3324a5fb0 +size 41125 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e18eec85b610d94ea35c90e4ac48603f6ffd450 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54018166da3408adc4e669fcb3561e7a6f77a35bb0c8b25b9e242fb60bbbac4b +size 57175 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5e1938243a1490198d6ba88a775c3e772ff7ccc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e396ceddbf0d3d540aaba572a32669efe74ff1331747064af7b6664a1860b2b9 +size 27007 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05a64068acd8940ab90a25ab17f58a070ee9e9b4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52f6ca79bbb1b8e3ff72647bcbcd2fa4583b8b3d121a289f3b50e04a129d41ff +size 14743 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e61b48a5d8e3c5d48058d34f904453ce663b5c7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd5aa132cc1090db2058d8b8c56f98934019bc4782ae0b7d61826c6077487bc +size 45336 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c6ae6b62cd6e8a082577b6e60149f213bc0ac35 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:627439b942b974b6bb138fbbd1655140bbcf7152aa51275f3db99194e43cecd7 +size 53387 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..753b1516ab85a789de22c62b22c7a94a59e2186c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f3ce5516bfe96e3776c8f638daec9ba29a818b32fa1ca00cfd781553e05068f +size 73669 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..782418b32f80f0c9f121f23da076b5a6aa80895b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0babd514c7bde316dae34ef403e8202064e3c1d07515a9d272a3bb446d2e16d5 +size 34911 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5095ded6d97016649e204bc4ef44e2a00fb4a00 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4143888b4f384b9f64ef2236ba8f2db6210b98c35882bbfd7f924f4dbb1aa85 +size 39227 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3283ce54261ec2bf364b131d9be306d5698cfe79 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71442eb34b7d69ffc24a9a0ea0d150821e1de34c9b2eceb9604773369a01a212 +size 79778 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_183/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_183/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..426b9e6f315c112431e10e4a432d892c1a087d14 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_183/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7979ad5b9c045c682c769affae7562845c13ad0c5dc8bcc40d7b99eff454bd1b +size 20978 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b34a02696a21b19bc532acb8935931788368d9f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7b473f41ff93ff42ac1fadc32176a00884347b04d27d863c64f6dfa2f2a2f0 +size 17768 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..633f41c802fec413a3b4a66e7a3c91d61e929ecd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1078f5dad06fc312de47bd1156e32b596a7bdaea7a9a6f776a50f59476c186 +size 74821 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f8ee51629004e8ee9e777a6e8f6bc6994824f94 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf27dfdf7118f564c2e4367c18b64ba8c760de53074e00c23318dc28cc60ef8 +size 27111 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7269836d0b30f6163d79ac3f9168a9209f2f6cda --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb78b8a5cb6f6c40b7a2da7b5bdb4ce272e835bb551d3b17109bcc2453b8842 +size 44474 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c62f9924b6f1b4f72b5b197db87761aba2c326d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:071d1fb5a8e8dcad2c0722042e386d3efb0ead5baacc09ae94f826f73dd3975a +size 45623 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0653f96fcc50c816d420f8d02ef79ce784739feb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51da032dee58b0cc7629e614b14dd8d9701358562cd94612616a0e035cc6fe1 +size 63517 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d3a72536f23d7120d18f23f7371829a2d3b14b8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ba2f18b0460c0f0c33add319482dfe7bb3a5a109cf6a87982920b6d98ef942 +size 81447 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1fff66593a51465eb3d5df5849a91eaec0ba7551 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af62ab3282816bd1515ff1e3c20bd13f82f85b44bbaf6edcac1c0e7d403fe002 +size 36983 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..355fd76b89df9521398649749fb8535381e05889 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77dca3ea2ab502e9e06733a88e16814e1dca03b5f5526ae8bdfb67080dd9f99d +size 49295 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f24da2dd563c91bdb315d836e276772cceb8581 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b882bca258e687e4b256812a50b763cde8518a765b5b173c38774727e46fe0ba +size 68434 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdc8aef53aee8e446cef02bd1f2bbc3a3a87de73 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa807194199b9c3a205edca1989714fbf20d84a6584caf6038aebb75ddda6518 +size 33322 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac8f1811392dae149e99a485b25fd8bb16770e63 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05597c2b376019423272ce64953a77d99444a5ff17da282af37348507c897af7 +size 35825 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66ed952062e9239a1eed40958f60b6840d50def0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4c7d608a66b2edb5b0c1218a9f728276055f011884d48c112b7358ca01fd939 +size 34204 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b8e8f91c976903bc97e032705ec729fccf32797 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8a6031eaf9856c90b6cbbac90319e9e8a779891f325f74a336935531a6c542 +size 18709 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e7cecca0f6300f96d6594b5f8f61db61f0936d7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:146ec60b5da60e338451fc01aeeb6460a5ad01c5e4eac51f8a5408d0ed38374b +size 20876 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9fd5444ee00ba68715c1304b442d3b6068b56b3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03861f24bf195e939a7efb90862940f7e0c95adf0df603ba213357443c4c69bc +size 13128 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a19d345ae9e0d3a656afd4adaecddcab4ce1125b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2edb6e68725cff5ffd96cfc3c8c625e79c24f612b15f658b86d46588f9951b60 +size 36252 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81beed0846352f1b30d20e9e3fb786fae088756b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e78055c2d8d15eb12bba404813b1c2394217123aed8c25f83fb2406aa1052759 +size 42007 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..250a3c32196179638d0f98c86e1041a49a297ea5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db761a87d38626f1430b0ebea1d141d00487dba3cf7b3eabce23feadf7aaa6a8 +size 58389 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f641821b1fa809eb1470c73a50d2e1e348c8216 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9fc771cabe17b402fdacbb173057ca9c81995f46e5de82613ed908f9c084d5f +size 27474 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c32c0946051ba3fc488d6074aa38bdbcfa595f02 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c14a09697cc189f62dd61242e7e6a6d8cbaf05cf598ea59998409066d43d40cf +size 14698 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9206e7b41e3e21d6ec1afc5981bc0bfc2e23352 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:250af08e4d787d619f01c3a8d191122ddfe0134c4748f4f53f4acb691e12d3aa +size 45232 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a94c38478614336b303d852cd919f4f6b43d41b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567e5e34b2df534fa851d8158f05112b8d80ce2499601caa68f9a081bb31c36b +size 54083 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a371d1bab808049ab3dd66b41737372683caf07 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84919ecc96157076b4634016dec8ed9b10a66dc8b5255a77d098d16f0e001ab5 +size 74109 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f625aa45f11bcebad56b3e1c94e833df1d904d5a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6cbb0e04977f3c76ac5570de9c7107da626b166be222b1b64265593aae8bb7 +size 35058 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7433372d4071e03bb3c7c5b625dad52dc1e64884 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e92d97596510e4aff0d79a46af4b1aecda22e38e4bbb168ecca68ce2e9e4888f +size 39178 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85c360b58d2604724c99d918dc8a86d3603a731c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbd260138e5a8bf64f845ebf0c098c23d15f67ea8c00a598807dea9969462654 +size 78250 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_186/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_186/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e35b4af9abd87d55e4bf966091873b04fcf4f208 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_186/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94f7bc8d443e9dbcfadea0d2fadbac11e8e3d961b1d87aad78c89acda977b5e3 +size 20985 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e7ad9672b60d53709bb74f5b899dcd241465858 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7e0450675b5bdacac9121be9995726e517bc206d43ddf05e15015a7ae5efca +size 17150 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d12fe9efddb217355267928fdba4ff3a851fbca8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35878c14a5c5d2b1380067a5feb4669b68eaddbabe3757674b0f6289ea45ca6d +size 74908 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d51e0ee84eff47c39c72f6b25c099f2a853ffa4a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:453d726dffa4f8e6eb1e17e02a0031fd352041ac4d61c74557d11ee33926438f +size 26835 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15d316d6b97241ded3e52f04e222b74b5807e76a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee642a510cd04e39302e65d4e08c35684ef8616c9da8ba3d4fb56f544515b06 +size 41479 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bec0bbfa988571ee33520d42eef2a7278a4f434 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25844db114eb2842e7612db40e9e4cd4313c63bf6340b87546772b0cca7a23cd +size 48488 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0585ff24e2d8c795c6544c1483843ae8de06896 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7acfb457a98e109c6f1ab9f9a322ea0c70a123646b12e3174ac99909e84f4b71 +size 63960 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc493bfcbfb008efc1eff6d4c780268c61dc2d61 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17107583990716c205e8d4536c1eedd9728de0b1a2d31f48726f5b7d53083277 +size 79657 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b250ce702efd48adf2f9a9eb4a356e41bb1aaeec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2cd8b5f2e2205f3e4ee93796ded6d8f65d8f43ebe357304b1fc4e901473b956 +size 36479 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae87a26f2c1db250ab3df1ca34515cae61b308a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2417d8a3f70d78fa312879c7c1da316c8b0c340ef02648f65877659ec827fe12 +size 49030 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ba8509841b46a2baa462d73a8854c67b37e126d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91c5fa3c301d0a0c00c5942868ae4b235ae37b96964c6d9f87ecc6365af244b6 +size 68294 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75442ebdc4aa47bcee261f0db382cd03353634f3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2fa75fb02bdcdc5d319b6f6a1a828f6baa44c67b1f08de54d6f0201a9f0426 +size 33163 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a2ce5ea7f776df93a3eb300e7466783ee038e46 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc76c8ee6d4c464cd000f3a29874aa017b6022e1683ed40fe5d5fddc8bc7efb1 +size 36066 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce5263afa8725c4105ae2277ca19bbd12b2ff36c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a159fa301adc6c077bac54720ceac98184a0655cd6e0253750d9cf7126e33e92 +size 35607 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f3cad84b9e47960e3674a9836e56d04cd07e285 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1412a8e0edd2744b3d1c68b4e8eebbb5e9c181f25c1ce890d6a9332d295f4c8a +size 18886 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13db2d6eb27ebc75598652a9d40cdc96d722aa20 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffe7d6b70d40d6652feb27f6357a2e225c45fdc2da6f09ef6ac9c02a3d05b96 +size 20818 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e825aff235f348fdff1fdd1e1f4975ce0d2a76eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f5a2516ff6940d56a6ada8fd3500aa22427ac12556351abd025dd3716a37ab +size 12924 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90ba093a25ff52ced16bddc6a2f0c911eb2bd0c1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59b16fa8e3cf5fdd008021625709cda555c0bca59dbcbb723b3a682f9d6231b5 +size 33960 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ac361fe7d3999d52ffb676fb8923d843f1ad4ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff615d171480fe02d8b881fef0079ff034688177d01907ca0b347e84209e947b +size 41376 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7622ecbf01965188888373882b52e7210e3eb8b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aef887dc0f020a6156e1b04088e3e694030ef5ebd3cf5c92c1794c5cc2d6efe +size 59530 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44171042d12d58c5968b8339757dc8bf35aea89b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbaea4eb8697b3d46a9f3f87fdccabd59739f770383310b57c449c70c68361f6 +size 26096 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f9a802413ea6900ba948160c270a6c2dff8b31e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:097ebbf16e3a1c564635ad2930bace0ab83ba083ef423e9e860638ffff9689e2 +size 14800 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98e1cd77f4d37b254fe159a4a681b1a8ba1b493e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:810bbcb2920f8024a78869df02907f767520a8d40ff9511905b2180e8b4f9d70 +size 45180 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab60a3d949d991595b7fd968aaea45ea5f10dec3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a91d222716713c6b5ffbd98a1d6b34d09a6fe74b432f7bad4ca7918d7a1f6c91 +size 53909 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37384817929fdc1d325dc75ba347482f23deb32f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b0024bb7ac3add8e5eb41ba3c85f91f475d4d223836d86cd820cb1b9c658d1c +size 74119 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4db852f8c3681e86e30191629791b78caa8442bb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdd61062e21a93a50d55ae947c94cb14c7cdfa5f56646697c3e5291053cbe048 +size 34816 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7d705a1a7e49b60ca325f4ebfc08e730cddaf39 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7be39ec93a4f61ffc1c1b0e8bec253ce08d3202de92ca2a288da9e3167e5b780 +size 39134 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cf3a48c1dda4484afdda091a18ee93facdf1691 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c03785d2028305c1247355d3933cf1633be101c8a37e27fbb2cca72bdac067 +size 80267 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_189/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_189/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..820edc79b4deb3692074b47bdb5e8c4a1f2dee49 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_189/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0866af37776f92ebb0d2e3cbc762834e5c064b61261b5fc299462d367b443117 +size 21001 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4905736248d85d3f2412c8b2d20249a9305b895e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be8265ae746b358821ed2e6d9f2eae3c8201883942d923f599c44420dcfbbe01 +size 17245 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0b159e51b745d65bf93121e34f2c2ab2989dd5a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1afd78fdb91393ddedceeef1be285d1e65edb4b247e6ff488937928ff19d453d +size 75020 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..053d021e81f84d34511455cf75c8083b8823d1b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a194fbcd1a3c19af709f769e2e197be148953297a3731ef2150dc519533bb74d +size 26920 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7874007853ffc91bc309cc925367edc0a277e4a0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5af0afc6a1c8a38c60d68739c0b2d829875ce8b0988da1359cc931e9bed1d103 +size 44529 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47b217c15e4b1ac56fc5a1fe268b50caf3359f7d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c706b427b2fd06c2825dbb2d8d1b264be8c68d75a3b2949bde7a9070c8726114 +size 45106 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5245a22d30fdf1dab3bc2020e5bde98401c79b5e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f50dd968749dc530f12a0dae9151447f78b5bd2fc1a46e837473dd6140d036f9 +size 63430 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1947a1fb3720727d5852b62d192c1765c1f8f294 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eca8e226277219765b6b13ea656effe2be83b8761c476de7bd05a9622d6bb6f1 +size 74018 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c8435e249f172f40b71da753a6313ad77035b8c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d30a853107cfa1776d4f5fb8dc9737607c60f02dec7264081361e70594942bb6 +size 36603 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f45216cdabb5afed355b1904911e7a4a297d7f0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712aea07a5391c6ad6e41865b1b99ba6bc1e55fda975549cc102213006d90eab +size 49364 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0765cd5395c4c4591b288db14aa601eaa80cc58 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3bfedc2adc36f837db715e0d7fe345eb0682281f9997c9f2ff641acb96c3ed5 +size 65519 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01255e2f9630e237d5ba5a4b9d1b689596119b24 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75adbae62390803276552c6cb4e6b2f5bcfde3f3fa30237cb27aeb70ae70f304 +size 33153 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..315e7faba530f2462838a812b955e8e43db1eb31 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c3038f14a7b631f745f2fe0440fb1731bcc414dfc3c67bcf6116445468671b +size 35953 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bedfb4fa320e60b421cd89f79a60ec4b276cc208 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c22e017c183ef32f4c69b744737c9559026e023258d2419c328c7968c0817873 +size 35107 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fac014c4dd24e96748169cd8cbd3efc34f20bad8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eae73cf6f20fca577ebdede4072bd52ac70e138a68390d1719dbe1fc45649dc +size 18806 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aef86a8f0b7ae061d84ff22c14b51916fdf7c4c6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:697ab1805a978f107e7ca2574c62ed1bbe0ffb1c581b0bebe88cee3785dd579d +size 20909 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc90226dcc3670f86a83cdd14e179426ee088e5a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f64d8e67bafc4088b1dc829d6876ec61a5d478754fcc5574770859fc8658ad0 +size 12614 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc1c789a245f19e91a2c3e8519c6e92d4673e2ee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d6ec82ec2294b307836481cd713c7f3272d8020dd7b184811006fd00caff42 +size 34344 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe3f5e990b3509a809f1b6c5fc2a7ba19c64d88b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e9fd91a3354ad118dde34ca458bb7049a36b19fbc7769aa7f73118d5e8bf43 +size 41716 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6bd11fc1e700d89b5c960c364a7d2319e80348c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e9066d4e363ae23643a8d02c08b5cd20d30a414b8d7985dbe78fda8ef6b7f08 +size 60273 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..694c2046426700d9626965467599bc75d67b9068 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1e3a8c5197d96ae50f2a113c8152935d81b42d50a232d10e020a4c99c6aa285 +size 26437 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45e0f291f77b1d4242e5a3ba141f6704af750f4f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d023012bae03accaba428f8e4b7ef7406659006f407cc3f11c04159caba18c62 +size 14773 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1fb6b83be2ad668b3e18bad866b53587198bdc4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:414d124bf1da36f5ab8f84c27900db8cc59a35eae1ca7914365ededd6361c1fc +size 45038 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea048d27f8cb566bd8517dac0bafef1758dc3ef7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4491f6179a0bdb037ebd3e5ed18dd052ec609ef51f618964d50f2b3d2d8d366e +size 54118 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ebbd7959379025808a5b3e0468c24541a372edf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97a9276e3863ae5d40dd3e3ee508fb7987a00dc73a8949981947d0a3d137298c +size 74404 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9b67e709e4de02575dace9cd7799c122b5ccebb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79cbe381c53ce097dfacbb9dd6da55e80514100b9096d2b3ab07e7bb41e50969 +size 34742 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b92a690ac6d0f23033b54a4f2846c8a18314eeed --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:667de4f22127b44b49b1658729ec633f4695e88b848bd7ee38620f98dfd69094 +size 39239 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e57056201ef1888424c8b221b15baf8fe1340e24 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0451cec67e661209ae1651137bd732e2a4f9332b67c628a2f03e14eea7c0e38 +size 81772 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_192/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_192/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ea86eb75d5cffedd4719307254faf9cedf58b72 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_192/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5b50873e2a6228a60af3412fad744498471835e74930dd30c52a8cd2e18eca +size 20992 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00bdb7118ce38dd2be9c010d0abeb7795feee2be --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b8fd0c72a8923ee00843d872feb48e0970de63ac649da6a29b16ea0f1ac7289 +size 16548 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2499061498c403a076e353585b665b10321d0eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e258e0389d4cf9faf95267eab7fc2f79bf247d4efb65140c8c12334dc7e47dcd +size 74177 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a20355327786b617f416eb1decdd4e0d9404d48f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5a5beaaf818394bbd04145277844050397c00c803bb0f239b4d66d5bd10618c +size 27009 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4cc43e638565fc79092ee62dbc744309d8ab863f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb1a1ba7ff82a7a777c0ce2c212513d467ee96f7edce4f46fe22db2814713584 +size 44658 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee44a6b4a9af9678e4d8bcc69904a83429f97f15 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efcd5fc31f05686a0e232dfd3b5fd8d7b3a3eff796f771593cfe8822178f2fd6 +size 45489 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f22e7482c28b16cfd1010511ffbc4769943f461 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:453cf5cc6cf334d0c78c71d753b24d3a8103d6455b5d89530fce8f02aa00cae7 +size 63761 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2af315aa45cb9dc307868eeaa4a4a00aa6fbd974 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:507f8c85c125ae46c3abfe54f09856346e6a9d618f69a598293e2f0fd29f4292 +size 77484 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bd94718f2903725db073b069d9566f30372bc8c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc72ed82d21e19b8ff9b474c5fe231dbfb68c197bd6594d46be114a4e3293e5 +size 36103 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbab51d7609e4395c7baae29be47e3c522a22919 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e88646c7bc3c6321ee2214eb67cc16ba84964d0c7b376e4d710fd5e69d9fdd0 +size 47726 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1fb6d5f49a7fcb1dac489957b460b82a6e376a5f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dddf90b277cdd232908af47a3bed2875a9dac3eb2a14e6259c41a93fc78d982e +size 67894 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e1d4e3ee610603a19c6bd0fa3a3e47835d8716d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a1d727a1bef724e4a1e49cf6f63fc78027fa44e5f6e291d74db1cdc4afe2bc4 +size 33125 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c76961014d1bf7d1f1fdb5ace9e770e60d3f5c87 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529c81d9e009743077cc6509bab55632a3615985293458551b4a4dffb9e58463 +size 35303 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7fe8fe2536206a91cd8f3eafec378b4e91d807b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc0fbcaaddf5fbb59e63587ce3f6df619ed78560495d433e679493c1391f026f +size 31593 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf3c8df6687b11ff420274804a7e5137453fcd8c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5271d9b0f4af2ad4b62cf8fd2956666ba28c79174d2277052c1afc5144454734 +size 19641 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e70ca245968612d3d1bef8c61c90ebefb3452fc3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52fe72b580ab5e45d95a2b4d646d621f3e08496e8744c0bacc3d40666d6895f8 +size 20865 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40cdab541d24adaecd8b69a2fe6e93e534489298 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:337b11091803ba317cd28fc768fc0bac0d9242778997c41748ca777541577105 +size 13263 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..310122d1ad9fdce5ec06924f99856738d41d2349 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373b0bac1c813d25a1e20ff7f4d70b94d383be4ab8d4944ffa76a0b2d1bc08f0 +size 35707 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5daa23b4323c9b753e55e1ebdef7e5b710f2545d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55df016386c81bda62a2de4c4164b302c21d36c3853e7550c53e6db0b2933308 +size 42732 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89621dea05a5a947c42f6f03ec147021b30d9d17 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6161ecdd7c75560591e28519d3bb2f837975da47af279f4e6e6a29bdb708088a +size 60538 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87e67bea4c5ba91b9b80b019b3732c65b62ea2cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa0e927c0a7b66c4da1e831cce8670e41fd4ef24a196c9af97ee5937f045752 +size 27510 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc13e4a51ea770cdbc47708d4477b368b17b8c8d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0904e0c60476104a4acc818fb1f9365845068a0f9da975f6490727d5f6d24c60 +size 14791 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d213335e707ec762b44a877fb1d9406c88619ed7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6336d2024565ce081ae63d0dd7f4137b2568b1662f37c21e2a41460bad173d1f +size 45072 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9411dd671ef4394c9b32a1a975d25539531b2245 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54ce1dbb5e25b33b9b3f97995926d51109f7346eb8dd673e60af5651761a1cd +size 53973 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1f7a0f2b856e46a618c8f9582b69a553c32ba08 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9b2be193c023a4965809d486a28933b0704e011692033953f56b5e7f6c944a6 +size 74046 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a52d0207f8643ec1e7506628038479543b75f60 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3a4f1b2521f8e3a5f0eb0f6d320feb139269c83d215c4b8fe0cedc1c902d66 +size 34846 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d3655efaed009c900b8a6905d46319f19b65dd6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4063baee7f6036a909a7ee368539f3e29be03d14cc394d21ca2ef95c262c7654 +size 39155 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0acb00299b8308fa0f2e7dda577fa2230165bda8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ccdf9790f72a107fca39470656d9a0f1753ea7576046617078c57e21f096e2 +size 80114 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_195/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_195/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b584af2bd766eee7a3671df71604b53228c278ec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_195/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67da088342c0f84a58dfb0e8cb01921d14e07f4c2ad39a089b4a26a0ec24ac6 +size 20985 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fb68b027ff7839315ae17b0354aefff8e8a7227 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31cc1ad06229a25c65f826c0c3cf820afc3d427aaddacd5b08770d33e009a6b5 +size 16561 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd1b47012ae00b711337b779b041d8782cf4d1c7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d59d3a26c2ce8efdca0a8d982c584361424a9e684401e39ffc4c182ef1765e80 +size 75113 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83a48dd38778be9bfd9672d311c65942b30fd90f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a713555be2c10a6642a9bc8280ed4ab380bfdbff98c07d48dec15c8bc1c7ebc +size 27464 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68a73e945350abc73a9c64f4ebc00af29ae86abc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d5597a18d34d70311d550602ec7be270396b86e24ca7cf76ccb96e67a83303f +size 44408 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44ec18cf75b9d90269a0f80725e45e31d1ce0f44 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67a06b6e672ae0df8c65a0f2f48de30a097779abda35b9f186f761e5f1496b9 +size 47070 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0816905cbf4fe7cf6c2c3df6679600babea5ac81 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4859106496463c3837d16a8b5d042a2f8e34cced6fffc2306b4de5eadf3ac5f +size 64595 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2e8bd2cac95ea7556764a1cd1a24039ca03440b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038d0ff9203a05e978d51448f88b37276a7ac9055540a09bac18f33bf14bfb6e +size 80586 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47df1dc2aa2a491b07e8e962d116eb907bc8de34 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71cff3d1ff6c1cde23c3836358fa81efd2a59aa738362776a4eae06623fdaa90 +size 37014 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc32949d3ce192536b73599e2763b4c822d938ef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c4b648441ac06f9c7776bd99d80970fc4447f5bccd6d99c234415c0320876c +size 48637 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1e22c1b00fc295cac94c7ef7f4739436811ae96 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a9e578cdea25c470e2ac8a5d98a2d193a96b2cb82d4355eef999a234739a612 +size 69158 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cc7b2019fcb11930947f92e8e5ed04bd2ebdfe7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f27eb2217bab150a27842d3d0575e21952a8db9aa8036b969e3c65b34117f57 +size 33170 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e10a19da0d389829ddce13ae3f89cef104689a26 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:469f600cd795da61800bc941e71ae4cce8daae1430110389be421ecf49458fef +size 35722 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34fec4db14cd93b2eb40a44cd4689f410260eaa3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd78ba85168715a6e34b09a1e5cb74ee85b7962d7b0646f939939897360e119e +size 34214 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ff60f1748e1f14a4174c5601d75d5f0c97130bf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1b5297b6acc9e3f170f7c30a62d4e3d3f722ed7f0b69f431ce2bfb7cef96d23 +size 19561 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6314fee62ab7c2565d4bcee609ceb6aaa3e83e5f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d2100214605f8cfd1ea3601b54c9ca9e2b5b6fcfe928f2dff28644f85675b9 +size 20842 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85b0a5a4ee314a9abc09b5f0aa505fd8e53e4078 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d80012ba3d1166f58a37725d98558c8768d05338662a286a83d3d6f919eae3af +size 12628 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f90a223941b1373bd6f619602ea5a508c56096e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a34e1d3376afd0572fe522d820925fb86e9bb07c1abb314f4e315aad560906 +size 34452 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b47144be26f028ff42e0cb60435e486fe6b466f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a83dc182a1d6715141056b869c393421675649d9188edac12a4f50ec924bba +size 42812 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56c2a65d6f51c18dab243027bc659594d17f48e6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9991a7a4a4a753b4af0df55a5ef4d86ce8ea4751762f6ef21b38d3fe096ed09e +size 58114 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b071a34f73efec99a56128915dd1c73abed3dac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d3138318f97dd48c546278901b235a555ff4d9729ff5bd07e22e0aca06f044 +size 27730 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cea602d1e43d8fb0a237a17c3dd2a8cc0f9830e3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac1b669d0028a9c43d188e4b8eec842cc9498aaaf1704f696bdb0d09f320f15d +size 14795 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8de454872e54dba378b991616c2c0354f24b35c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de767c24174948503ac99d47f83a598a8a21ed8ac1bb518b4b7ed1eb3c3a342 +size 45181 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71ff6379c6f5e9c1abfb0093f935b30e29ecc7f7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc2a655cfbfb042979d35dbc41803383fff0185daaab53a769cd91b39795796e +size 53920 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a46479f86acb65fa5ebfd7aab4a94987e967966 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd5dd303aacab3742d2dc6435041632936cedfff01d24c7497f5bbee5ee8a066 +size 74064 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..789b078f061e3952f8b040f24133e63fd92c6e37 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4398bf96d656e601d45d07953c798cdd5e95f062e39e189a02af9b4f4975090d +size 34972 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e093d956da5d78af054b6e6a3b35c860bfdd9ec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8f549d02b4abf32760ef5d9e530fe403276e91795148853071420f4d279b49e +size 39217 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4001036c93cb7a789aba05d42e4bf79056d136e2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a149e6ba7e6515da14bcfcbf9503597247b6cc32d34f992387408cb3896ba362 +size 84056 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_198/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_198/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3935742ac3a6f3c127e419f6230b378bea666ff2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_198/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db19eeda5c8b2df6c5683f650c4773541fc0d0a80c431d2054b2c3bf8a2c9cba +size 20980 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3f757ae0c65392e6b6428850c2d863a3d79eaaf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d1c611f70d8b2410d0bca327935f60ae4716d93ea3745fbe9ce6f9ca35cbfce +size 16918 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcf906d10713c09434364a6a449b00e6053c1af1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197ff365eee42c82992523db46a64a50bc1c6cd00cc84a8e9ddef51c440317fe +size 75745 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..794f171d00d77b3683dc13da17971519b9fc2071 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1033426f2a622f64a3963175955e908e4c6ed1f45fa6433cf11c3ecd49529d +size 27026 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..518aa09856084b9056f8377ff4deae6c201e7bda --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55bbc66223a643a8d80679790812bbf48da92fab2c42ffda41d055270517ced2 +size 42443 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31bd44edc5c74b3bd8abd96e0ae95c87a85dd246 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a80d6c5dd8ae36d14c0d50abd501b42949b925a3d7ead99ae63c14b63c7194c +size 47435 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f39c8c1bf2378fd5907404faec2c086d1c2b612 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89c1eef0792d0ecdaf531a6b13bcdf58efc77c4e2ce16521a533eb7f21ed58f6 +size 63231 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4169b3407e43dc5401a86e258486ddcb767039ea --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c91236dc1dee0e3957c0a61eb3b15af90c9a67681f2c6f64cbf034e5e5696cf5 +size 82225 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de79be17f87710a737fcb4e8beeb86e6ce2b28b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e62f8028e2d2c1553faa2cd4759215114d1298ad532666a7c6539313fadf5b11 +size 36467 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a302eba0a5da1231f593bde2d974783bf3216f01 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff5e27589cd242b3120ac6c0f7b81fc90a6cf372952187057e062c369edf088f +size 49291 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2f32a251db312788835daf3b5ba78f854c5a616 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5afeeeb3c20f5621be4960131b5e1d4a8f16e9492b227c0e2fc34f30c8f48788 +size 72761 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c795d84a546c91896c79c54b4e53c5425f65275 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:237557828682663f95eb87a0522428d8e4d13f44f630467b6234be8c91099465 +size 33138 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e762a5aba44513c23f6d5a4352401bedc3a479b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:053e40a5737aa8838159515b800408970839978bac877127b9a729bb4e6eab36 +size 35778 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d10c1421db14fafc16eb182c1e2da8a2c2ce2e04 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:075dfcc7ee73a2159540428332daabb01c934c52a4ad68337b4ae9a920c9d7c1 +size 33935 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b25e64aad74aee42c25c3898bf0646766955263a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c221df23192715887fdcd098dcdb72dd0ed9530a9682b2a5f6122bcf4e19112 +size 19086 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26405af1ff6a0a74858071b3a9d9f1ef55463f09 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c57dbcb611578a09707a91570364689c8d842d297bb587f2c8874e930e22796c +size 20879 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b48c28b3e5d77e6b2a2fad21939a0e5e71979d09 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a5257251fe93948b484dc1ba25490fdb2ea1da6b6d3b46761e78a8e165b059a +size 13336 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73cfc7a1abce7b1009eabfbb687c9db52cfbd77c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d3382d7d55f1e27d2dc2e7694157c8be2d92613114328125861e8b0ef44ab5 +size 34717 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0cb7f9ac6199eaa88ddf10d8c954c07565f7277 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cc2b205831f36e880c140c4cf8bea64a4973b20d4d357e1bcff22dbc8ad81cc +size 41484 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35f5e32da15f4e7ba8f4560ce939f02d75ac3c1c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b9066b1b119b7dc82e5e31780b04efd2903964fef8a91aa13180533ea8ef7f6 +size 58878 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..268d58920f45dffb6840f441ea0e111bded30844 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa476547261fae90f8bf0a43a7ae98de7d8bdadce4647942095be576618be592 +size 27254 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11f0cd4def5c9d516eeff42222c060aafff66704 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a39ab303459272e22466fc4f30d3cd4e8ad0f6429575a0d5d6724bd66ad7e37 +size 14800 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef778f6b94fa4481dde0589091542a947a09cac1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80c1ed9d6e19f0336072619578a222c94fb74d7e1bf29e2b7fde43a08696ab9c +size 45296 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cf679a5f444034c7f1094a8e15855bc846fa84e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb481ee8d43138c02245e97d821ec58c5c1773843349d70098439b3f6765e93c +size 54292 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f99a53ab19058b3ad97ce47e189a361d168aeb7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64fac95b6b9cfe815fe39bc0362143d77dd33f6682a1a0be600c947bc637b21 +size 74686 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8152cf8b532627dc7d3ad6ffff590fdc762ebcf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a49a89285f2cb62dc01fa31743f2a2eb383ceb95793f3fd5ccce49ebaa1479cb +size 34821 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..097d5fd1d29657295bb248570657ff0f35d70aa1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c3c18b38cc1bd9a6ff400b11bae2307b8c3c2a7ac1d7c99274c119deb8611a7 +size 39175 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b8f846db16e45b967a43be5d7e8a2d62ed24fb8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0aba35d79802f062a1e8ac4f8373cf00514dfb2add2299199847880a7bb1feb +size 81217 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_201/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_201/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6c6d2e8a2a8614873bfc040162a86c93b016b3f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_201/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b74e606ef41c8cd1e8842bdeddb3ca266ac5f7f9b7403380fe550d1ad0d1f8 +size 20975 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31e66e5a474b125613312d0398f3ca4194129a66 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c28a4bd6fcff54a2548209584ad6ab7e4224ba22605c2abf8c31c0c7b9e96d90 +size 16476 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67395bccb265df6a0936c2332d3130ed8c2ca9cc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cc44b2b66815808c8a5b07392078b49be478ad445c08a41c687e53cad137f60 +size 75694 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf91796a71aecdafdabc3c7ef9368a6a123d311e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b335d73aacbcc28b5ec6e58dc6bfcba394f0eac3a0771c1151b0f5875f233a32 +size 27235 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e23bae7cb2f0246f6d56f0e739d5b7cd5416339 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0157e447366a830b89f6ddcd7e7529cc559d3d250099c130a83f50a9e0e6d4a1 +size 43199 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b4a7820f782548f26a21c3c99d8265bdc7a425b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe578e5d42003b2c5f2df00efd13167fb123969d8b6d2b4782fbc73b6eb7135 +size 51224 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0eb4dba6c2b3e49d68a11030383a656df340ba92 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a241474367c5c29573280a284a921609344c3318ebf330d586c72a02837c2d +size 63763 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57664688e6b98061f866d0876202aff967d59931 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b76b003fdc4b557c71673866e52801b7bbc3d0e4d69a1936681dd936f3999e00 +size 75137 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b30511a28f0e141afdba5ba64b30c7a4ec86921 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6022929473c78d0d652b3838d55112d5eb1c1ee40f4cf8c8934991821a31a63 +size 36771 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc8fda907493991763ce4e78011f37761db5d5d8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef770e15f1e3d262b951cb3fc8e94a57cc791d42b8bca831297455ee6531cf5 +size 47787 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..209a9c987b035bc07387eb62081268c9807ca2e0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06dda4d24206aabedacf053a9d9329e1664fefd4851d3035ea41f163738a7e62 +size 66433 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..702fb344a1dede300126efb44fa66daec5b385f5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f163566b2bbf4bca9df1d691864371b8ef7ab084c5f1cb1ae99f2b67272f07cb +size 33120 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a24a814422706f8080b369ec8f4b3e750d6d71e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56658bbcafca053ccc9b1bdd7e61ce5bc497c3ee14c77f6738c1f461db666233 +size 35991 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..551e2021765f976a6a58e2f21a77962b52893754 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7a2b5d7a92002ba0faecf6e19ecff455d3b7eb4e23369aae9c32af0008577e +size 33854 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad8f53f2b83ef78d74200598e23c2f49bc642a28 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d07ec36203167e4717455961ea4484241e06442d5e4d8a77112e5e4a0f2d92a +size 19054 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e27282cd9393b1192120d20eb507c6d9e840125b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2810737909662f61346fd2b5340eeac32da5c93ab6b2e52548cd0344b838f7 +size 20899 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b890e9777cd32987d58559102fb25c2f493267f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508fbed851ed5440d215315b2752891c61e89b0a4bc5f419b81520b14d294ddb +size 13389 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f7ed167717854cbb6de5d9b00ff58e5174774c1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aec134ff14a62ee0d096b473ccbe27354a4e511d421bc556807a9b1e86b6c61e +size 33544 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a51f33b76a8d583c5710d4015b1e6a3baa2bb47 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab124f1127040b83e7c54d436596f6264a6fd2b709c0ac3867f52a6e1932cd0 +size 41693 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce4fa62b6250f78cbd84262250bd950d9932f607 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:034f178b34b04340a74b611c2cab4105f54e4eaf62f65747fa0dfb65c0a5c46e +size 58136 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66f458279777a1445ff391dfa80ecbf592f9ee72 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:991eeb5ce6c74bbae4c6e3943b8406c3bc396452074bf44395490b004f794387 +size 27616 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6223829445976e82ec1a3a5b9b584b626c04ed15 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b1506317b460bc974be602daa6ecd2dc2ac8063d922c8a86f87ccd5c33790d +size 14809 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5cf88d3d8f78bdd26af3d62b602cb2d41eb4d60 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc8e29b7a24dc2392bf618658e943097b931b418dbd8b70fe81ab2eea663c63 +size 45157 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a592409487daa3c695eb5b532dc75088ac0f436c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f89677e347d0bfbfc65b8347b5cb54440cab7905891dfbebb09ac656aa627d +size 54195 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c79dfe4307cb54b6bbf2aea56a2071d8c5ae1b3b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce855834d755d83b11ea97fbe9c19b2502af78ea13dd68b71464c9dfabdb0675 +size 74410 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c58a3bcc04d064c19db20e82c048dc87c8a592b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54db4ff660aae051d08d826018515eed79c53c76746e0d70f4dd0934af1bccbc +size 34938 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d49cf33d9cf7227884bf5ab5c13fd5696da5abce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2d16ae095112d809b4dd569f97e83321f705e16f4bd43df8c2c323dcc006ee +size 39179 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c2f54fe3e2d06e171beef7e5f5dbcb10334196f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032afa71800b7d0f233c2dbaa1405c82946261c73e77b354dcb3515aed608b74 +size 82859 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_204/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_204/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73c7622d9a9c03c17436efc31fce822276eefd81 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_204/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21bd8f2934cd7160ec900e6e8401d31e64e07e9f95dda8d760ccbb4654d3de50 +size 21000 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f223a4ee7af1ff1b737b37412eaa4094a5f28a1d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021f4dc702479547a280a6b3571cc53912c85f49decbfc5537400f95e86be0e9 +size 17243 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b62d9fc27601deb1997c99f06c6ad3c36696d0d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e98b78caf5226bb1f5c97ccf64c5403345d0d8c3d9c36b6db80c8891cb311350 +size 75790 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35e5a68febd6f68115688f81a3ba0576040bdedf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4887236e9d9d05cb27eaac8be6873d1f350913afc31cfb5aada7d4a7973e7e09 +size 27673 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6faac9c7a9695d03eb6802b6992749947693dda6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71ce8efb17737e341b5b1d1efdd221af08e43978f003b0c8a03887ecc50fb1dd +size 42255 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c3d1a92d5e89df969874328b6c5afbc3bc777c2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af8e316c382ef50e2b21155154edae28850d7c32ebb82c80d40bce1f075f4ca +size 54185 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c70d0f4970871293b1a26a2c9db9c051d6bc432c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7edec1b51565e7dfd25f9d067f7c33d04765a67fba0b60cd834e8510b28ab975 +size 63651 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ee74946adb8bffab1eea76be9e9b77f99b3191c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3997d03a65bfa576d302e3654e4c4237bf636ff4876abb226a80f82874a6b60 +size 81383 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e35babd9913cce0e1e1f9de2409f2807c857aec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc74d30a42acdb1c686ba911f54941b29dd0e686ba37875ec1746aaa7197277 +size 36777 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfc328bfaba469d29ee97aafef90422d6356b415 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc4f292e8e1c7c1f93954614f52876ead749164bf2824ebdc672c55e1bbd5c04 +size 47720 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aabbcf1fcf0f0eae255532638fa38643f3b8c110 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40615ee30172b950ae408c1e358d111d2fee99bb4abd0bcbf9c242600171213 +size 66357 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bec4772467207494d0f50d1d48de2ecd310fac41 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f4e5cb5a224e29d37b1fd1fee94450b9400746626a337c810784406187aceec +size 33030 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e74f67a13c0b98bb245ce0638400a82c5a768b91 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f6f8168501d38df45a14c20cd61b87792e753617553a2bbfe70fc0943e3a0b +size 36240 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d310457500bfc0f76fca83a027b8acb8b877cf3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60245bb53cbd1936aa88ba0b959a9cf9c6bb2928e057cdaf01e42567d56b2cc6 +size 33058 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..493268b221c9f121ef85287fd70b0b9bd09749e7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a217d10f083e24ef4b362d5c3e43b46099b6ceaa52471c6be84a6468b6a6ac67 +size 19265 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75b5dad0f71f92b0e1eed4149ccc09252db6d356 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de87c885b4d902142f5094f9d505f81f5576e9cd9fc1842917353c68f5d81d01 +size 20841 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..882d7983af3595f890403d7aabca7ecf80ca619b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0c948cd1e089a1a348145b6ac3e21637efa74e9c0d32917ba816cbedae0980 +size 12902 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75389bf572adf97a04fd7cbb01719ed3e64a018c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02c50a23e4dd843c669af617d4616bd354fbe70dc7bcc690424cad55f4c6e7f0 +size 34519 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e654119ca41636eab35c63c4cc385226274860a0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9c8b0f684a01ec13701608306e277558db8f13cdb4666c38d7386b6cd630bbc +size 42159 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea84e22aa5c67f5039e04b338a57b60b1fa618f1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fedfbed02cdcbec30ce83c06fe3b8f196656ed18ce1dced1cb5c2c1888377f47 +size 59646 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9caeb0b8554dcabf53d3ec485152a58d7b6281f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d432e7c6a6533db43b991d0f27a1e00b96494045a301ced390e20d023d725ec2 +size 27227 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0429d6c61445636819343713031c6a38505be87 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d40d91ebf33fe068d6ac4caf574cd2a70bfbecd98da865351558ff638fc4a1dc +size 14869 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a1a20ac467d399002298d376c916fd4a7cf2f1b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eb533d45f271c44e94160bad8a64241fc501f38c8730730547669ae20724fe7 +size 45050 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31a50fe90aae26a15859319b1f9d25530b194c87 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7abedbda48ea15ed49d172bbbf30cb64d765fa6f1ecbd314780a42cf54c38632 +size 53775 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b434ef80979e43d6925d29e85e81ccb0c81ecfc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd311eaff815cbfdba2c5f1693cdc5420d109deba35ad89891292874113ed67 +size 74107 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63848021f40a13a48199825ac21e0e990d95d48f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3169c24bf46e75f0b54f6e4b36d1cc1cd7ae42ddd2722cdb5b73c6fb7176853 +size 34862 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..057f644d7ac365be9480e810709ccc1c00b96d3c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:157bbd34be151c6edf0e0b3dc6d4d1c48790f846a32e3f92a69e6e191c5834b5 +size 39267 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a109db467e1905b4c40c1b684d5fac4a07d3e75e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:617cc4338582a40db785a4f7f5c1dacf70670ccb4183597c06b60f722787af33 +size 82181 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_207/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_207/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..141090ed2e21d7f855219f131ddcb0089b58f3d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_207/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae04043565b89bb999db9bd928f393250218955ec9753c599e016d070d04b92e +size 20965 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67097572f23f913245c74043f2ef1add374b48d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f42ad1612d781d88f851d77db81878fabed6aa4262d3664a9b4085791e186ca +size 16320 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7b9cf0912ceb8da434bc40af2c30799d6eec6fd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e4e037aabb62bf5f392583232dd5f8694a33be2f76a020a7cf08ff81861d67c +size 75055 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5baac782a3a3ebba16d39c50b92b9a8863298cc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb3d247ef336b049c80270973bb28767f6d22dc22d98319789b9d28bb83b4f2 +size 27009 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce3721dad9fa25d617f4a85ec23287ef9e92b652 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f14c6f563130ed5f151400952c5df92fd1d64e43535ded43c8f792784e3590 +size 43882 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c8dec5515236397ec3494c8fd3822950be64b68 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6bbb7e9a0554e5f0072008b98286f67b99ff578264ff133055c812d7d2561ce +size 47919 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c95e2884b6a7c67996392d7164f67d60aae451c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd19f454184f04f98cbfbc313b084ef2d251d1f1adbad9e5d18a78591feb65b +size 63285 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7affead326218e07af9145c737ec95a18c31feb5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6796ecfbc98a5b4a97856c6070186b607cd1d90dd4bde697ab6a710da47198 +size 83721 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f2fcc7b6d858a2a0477794aeb9cacbf59e7652d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db5b7586dc3d86c34d2158b0ad5662949207794a158be141b3d559beed0cd9a9 +size 37141 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a9b90d16ae5e6029d44f2b07144d627560837a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d26b50eff28975907152f72b45b6168a6372944987148abc5df53c03287dd0a +size 49714 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68dbc2fa70af9b44c3e530cc42a826889cd07d9e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:344b387c6a2bdc09a92b6c2d5797c145eb9d73100e744ae27f1bbfb1b0a5f1ca +size 64730 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66340e28c091434a5f3e139895610190baa62b1a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5fd8c45683693aafd877942473ada8c6e9ea540c3eec6398d99e054bc0ab710 +size 33230 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34ad8f44c3fde09475ca27cb0ce7d0c9085ec31f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3cfbb99a5b3978e519abb74f899f2ffa3b3951906b4c466a80fef0b64488bd0 +size 34950 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f5646028b5b1849cb937f6fe7ceb908164360ce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b03f62e03ba6c8f7016f184adb1aacf8ec3a988bbf5cd184b8a8883ca00e28 +size 35140 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..261de85128970636188cb126d0bf423ecb2570fe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6758fb8b36dae86e3d4e982e9aa35045302faeeb8d63c88ed5282b9b1fd92e9 +size 19175 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a20f7da6c89679ce5aca29fdf09ff8b7162b166a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7ee7f7e70357a30b8ccc68517cabc268bf80f212451df5c769216dd9dae558a +size 20867 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cddeffa115e992de4d2ea49187b5c4e723e1e251 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee756b423ad9cfce3c30c5535f98baf37ca9d57557988b4fa8a85599b179ea59 +size 12678 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fb3cd895638bf4f79a40eec3709f9fadc2fb1bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3de38ea4b49cb03c06cc63024f99352a5eecfffbdfe822610f4fdadb2968e30d +size 34111 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ab455d89215431e0f090245e45b6673805460f3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ec3cc7a369ee71469233567fe916b9d8eb22e79ebd7b880fe793fcb55614b0 +size 42062 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48e40a452e69d5d92ac00024779a15aa6941d50a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eab1a8f87c8a1c6cbed64a6f61547e71cc625c8fcc9df04d2d6669153518a90 +size 57020 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ecc04d63143a8de881b8f75ff36730bda9fd93e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e4f1ae19d641ae0f0c689c6977dffda7624568bdf24018e30dc6edd52994cbf +size 27262 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..401d5adf0fc8afce1f1a3d911a0464cfe84bbdb5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85fad8ffeb2ea13937f3ed79040122ac019e87140bb4370e0d2ca9fbcf41553c +size 14859 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e441d636a7d199e1023b9eb14254b2db935ed7bb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eca20f4109b1b6d8e48b93564fe89b56d6ff9cff780a3a3cc09429b5a6a6584 +size 45312 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc8458780cebb8eca1a92c5f4133c5fe38eff3b4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1432ff339163cd27218dd2c3856d1082fb4f1e7789b1c7365ffc3c6b9be17909 +size 54000 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..106b1dc62fc07db8a7d2d159c5e88bf0974cb957 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e2e7bc4517cc8493e90ce445b04ce639cc305cbe82df5100a08660a3b34006 +size 74415 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcb5c48ff9708a35569b7403124b1633eee1e71a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55236e95d857d0d976d48075cd5e6e16ad50e3f8054068d4028dc77a32f486bd +size 34996 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ccd911abfcd86bedc9c71b3cfbabc8e749c281e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539c52ba6d8f13894a38771dd83591421c42956c965a7f9464985db9fbffeca5 +size 39146 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a8e73c1bc73442b6c6fbdc4eb6e5b9f8138311a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:554d2d52b4967e6d2cf431dac93b95a3faccb293eb4edae2dfd458cbc8c1f8e5 +size 79499 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_210/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_210/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b1c911fa6d8efb37450eb71bd97471fc077ebde --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_210/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47764447d1e9e9c612066f7b474ab0382273ada6b7bbc4b6a265617991d76f7f +size 21001 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02a6ab5cad4c3f3cae74914f2ec14bde164c3176 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:234a3f06e5ba7fc2c2ecb8ca2f2e29752050430af0f583d6a5ac4e66c7721344 +size 16344 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72c01be775c20cbd8b56e2fd8cf4b5958bfd4005 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:515832a9e32fac82f02b1bc4f44550d6923b7099dd414b3a9d6e7901a499da01 +size 75354 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef6e1af21691018c44a397477760e0e964093cbd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0703d29e3a9368e22a25a64400c98cec67b88feeb3246fd1305e975d545d5db +size 27087 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..666bd4e6fe1fd6e3fb3035f5d852cbf52dbb54a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd5e6ba55de6f7bcf171b870c1c3a4a58514a1dab123343bd0f6e3ce755552a5 +size 44987 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76875314c2b32799ed3f9e76b0f872c221f864de --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea17d1979369ff40c9f63af10de35de3c110214d98a5a21202b92a3f7da3b81 +size 42492 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d6bc7e867a6596fbc803c9797e657da9f7f0b33 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:615f94fc7e08452f6035c022e88c4a032399ff9a6d1cb387830159efc4c28d1f +size 63177 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..912fc7d5ec5b91b19e18bdba96d889fac443fe46 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f75654b33de2f0445fc39f0471bbaa5b6f7742712273bf19d460eac4ba8d8d24 +size 77318 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0815a8a1b879e44e4ed84c05d768d91c7e480b2b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:559f3f4db183e2366ee5fc1bd39646f8dde8b7193e1a4458cfccf14bd4498574 +size 37056 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a7f844f122ddce4ede9bbb5ceff56c0d7ece466 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d655dee708b4d2faa210425dc799a7ab3c2f35abb805fe603262cff2412555f +size 47366 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f62d71018452b7132be20f52f921bda0faa8cf2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e6f66ba6e8724f0bd8d480aee3be7ca951999b19b400a22f7ddef66356a64d +size 63803 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..362c378b22282383e45f607e2652016f9a56202b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d165ff16ac6541d08b2f787a3480cb47af47d4e0f981e03279d6a1834026af +size 33246 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71aab8e0ad4c453a58d470fb09e252a0e280d2a1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb012fd7be9dcf774648d39d0cbcf105f6f2ffeb3666a9f22ac80fa80d1c2fb +size 34901 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0687850a31148159e3ac21af02ee7dd24c5f37a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbba325dfc13597dbb1ea2d4069ae6ec0af8d5a82f96c8124eb079bff3c6eb1b +size 33747 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e50056dcc7086743eeab3c8d8a61855d2aa6cb02 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df8e89836624357502bac3eabfadf6a6ff5c8a8709b8aeca78787f3a38348dda +size 19050 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4073059f86aad07b4270ab3aa984af741cce3ea3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9d5b95758d47dd9e231f8dda823fb3726ee4fff86ada63f7e276710d499cce0 +size 20878 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ced37c7945daff0a4de9a41516e8453b6e9f3c8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e4667c2785a8c507ad88feea2207e8fb0869ec2d7fe298ae7720b457e08be4 +size 12939 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1280d9eec65959d52e1ea4785f32b9cb44f24de1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:631f1ea223a8639838bca7bd2658c37f238e9723c3dbc836d65c0cdb980cc993 +size 35032 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a86a3aac79841bdd38eca628340f4680c2e347e5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7247d8816f9d780b2ed974a68c656ab0fb38ee5d4aac35607f77a434ebb91a19 +size 42112 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0bee67808339dd7c6c93f296feb1ea78853e2422 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8bcdad0b64733b3dbcc7fe29d1c218f4e3ea38c6ff9fdafdc2d76f597d67a33 +size 57830 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d55a802571f9bf98f3ac3a33fb7d6ea8131b9cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62006d2a9d7b2b4bcdd1e137bbcb290cbfe9585f2c0796e25bbdb303cf3d207b +size 27772 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a233c5cb9a9fe82adab4030481ac997b40eb81a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d4a146fa5a354d06cfc64cd71a64d5e2938f914db75450851b7283ea628aca +size 14786 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02542f1dc9f729066d453932ae1bf3661ab7eee3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:367965ef262aef16c287aaca5811e146ca3578993c57b6b3bd1fa9507720677c +size 45356 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..797de2ce1873802c95ed9419db48d60b82f0f03d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e3c1bdbd598d06b926f49608c3640dcb838f3b05ab24f44ddd3951e411a39d2 +size 53885 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cea63ed57d26000c06b8b880c26f55735e8f563a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a56783f49489cfa10c566e734b588ec11717215df83fd498e58651670f2520 +size 73998 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b3bed909159bad89ce21133dff188d9a113345e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:036eae241849e7f30e213869b03243a76b1adab0e4bf538ede92a38b8b4b98c6 +size 34944 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b789b24cc5b2d8a8af941504afd3d7612b233057 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5592605a536d234788ea6df93bc408b6cf52c18b6030818ababee2688746697f +size 39166 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea0948e69648011623b9e21295146a539c64d702 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef05452137f6a0dc994b070be65d48c5e0fadf1709db19d0e1203f1597249d94 +size 69914 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_213/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_213/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6928cba16967eaf485fd27b293d80f3e30574a0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_213/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6a0ecb4e37f22e79de048fcfd8e385e8547018c891bb36acf54b918c691ea5c +size 20986 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b04d094676e5dde6727478b87c925ba4b4327ba4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d934ab7b10cf4d150ba43647f5723809060957215706509e34d5f6daf1cb4736 +size 16706 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4b4a2f221d9a4af3bc1d85198010d3d28044cfb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac8544cd6f9006c91c0cb2652d7d4596b91de20ad42da5a874139354c878a27 +size 75478 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3c7ef3737a062ba6baf8b482326e6bc773a7705 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6564f62f519f151db7c9be2b1c7584b61de41bb0fc4fc0c8e4a2d3520a1c4ca +size 26790 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79598231abc6224b0ce763aaa942ae38b2a20d3e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4638c37c96f0a3f4e33dbd2c37ca933a9156129e7bfb1d47343da6b9346ce2a7 +size 44137 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..218837f8fd4beb19ea19803ab9c81dc01a0f147e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b346654bf792cff65c6f16188412aa6b32ef63d0c0d8e257d3e137e1f07d36d0 +size 45522 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..560f7d38a543d5e13cfb4d4ea4525a1275fe8011 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f330ea7efe2d90e2d1f5c67c60afa58efb8db2129312082c134ace666326960 +size 63619 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2729675da916c26ccf8fff10d2b3559d63fc94b2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9d689669e1186697862f1b829ee3751a5ebf6fe5fdb7009ff82e1d3b7e05c9 +size 77589 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20ae073661a36c352cc61c7ea956ee36ee352232 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9efeb898e2ced188600be232a037119d157cfae74fbcbebc40600f661ec0a49 +size 36954 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c4165fe12e6419f44948a647c4f4db0fa52c696 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee904bd7a6ae17332d29335ce190c656534840f36677b7dd53b202fd7cf06af4 +size 49029 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..388bde8f7de695a749e586e6361ba3cc95b2ea78 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd7ea0f05539778e8aaa53165df04c8201656eb4654f1460257e671f583c046f +size 70634 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d7fe896e9f63bd4e1d52ed71576ec06850d824a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee20fe5bca5ca13b01000e69c13d6c51307ebd862a8cdde5f4f46088db50147c +size 33270 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2b3b7e5292d69c6239ff979d1c6ca2161d80cf4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b135a5cda63dcbcb4fd8014946ae3b8d94926389902a2e08d30636a04a33d3 +size 35659 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46af9823f7de4fac5502f37af1f6b57f75d2eb0e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b0ff0fd108e43020aa0687f797de699328c603bb009419aaae47b5732245924 +size 33040 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d200b0dfe0345e32575fcd4765f86f8fcee40068 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:587e03de66b886c706d1a949daad75ff75e678b138696e4095a7362555414f99 +size 19424 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5d5b6ce7042954e6e1dabdbee2a5a027cf4c10a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:201b5f64e5a13b5d6f7599d3836c2840a68cb2fff3e078cbc9da7683f972f837 +size 20930 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..137414fb437ebb468adbcebfc0b967318ff9e00a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4548010a40bc6a7fcf9fe07852087e33a0e09cbdf32c16fbdb6e37e1d6f0a515 +size 12917 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95967df7eb9b6e7d42876c398e10aec0dfa5733f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81b47212c86ee8054defec01400c766cf0d4928beaa4074af12e79ac5114b35c +size 33637 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbcf54321ed5dfff78ce16c14f908559cefbac05 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e01b6dacf0869abeff33af5a0d89404620785c7f5baca700978ee204082852 +size 41590 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0075079d421bec69f9249fea82dad32f61cbe497 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3850c75fb877d7898a1b5aaf7e8f6b0442f06c5479a6aed6b6e01c05b3e288fa +size 58706 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18a477fec69b8492f725e8714002c505b40897bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4fd0220a0430c26acf26da4daf549b5db2475fb77f4381945d2b3541b01b49 +size 27706 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a69708cab946fc2d0e4fdfdc6367c716c6ac7a81 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a827bfe2573990211dbc60b4a9c4f46dd35f8304c4f452cd1e2e37bf92cb59b3 +size 14687 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45b5056ef6cfd05b43715ae8d39ddab5a4e82d3d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d168dd4008aa56de3f457c94f942616f50666c5fac77c6a010bc3eae61aad95 +size 45330 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9664ad975c25e7844d1d67031274970d422720dc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c1c2685243fc3113ec585ad4475f4d412ddd204615aca37f9330ad67476df58 +size 54070 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7995963fad1efe82fef026422cf95c5daa96b6b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d955b4e12f731e334cd3ae4014176e7c8522c1f3727adec92910ef24e4a446 +size 74200 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..505e2f1e469ac0f92b70d424def70403db31ebb5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a203ca2139394a3cae2dc020ddfa7d5421b0ef6aa6cc3de1eef6718ae40efb16 +size 34805 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39e1a2f93f2e523e1b61414e3590478176cc13dd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:701b81f5de52409bd42c4ddac60bdccf2ff7f27e55eeee4dc48f40ae3b257e7e +size 39175 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01a32f8c78ca84c7990da3a2a82f8cd012b626a9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ffc33de7ac896638d61b229a237d9d62ad7f7c079b37af549fd60ccd70258f +size 81165 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_216/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_216/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27fb19e2e7ac128d550c882141b23f5ae83fab60 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_216/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893593aad882b828e2049a0397ba773aebb48c862f476d1202eac0ea86a9e835 +size 20981 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34e6bbae7356ee9cae558573d370ae2e37050a0b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e58a723974fce25e7fafc642cbd2928e5a1c583969a2cb6e82ab411351a333 +size 16129 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4427f6d5686e8a7657917f62e018600964d24e37 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539ec97c2122ecbd639a83d213815c8f568ed2cd3f672eeed9597e2da5910f35 +size 74873 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9dae2d7de8857b435da2e8fabfafceef3e086717 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6d20157ba6b2547cd94ae9e5dc979edf3a0c7cd7c7be99b4b2905edd089dfa +size 26901 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ffe542df1530b4f37a263e3c6fb6e7166001fba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a47931eade8c2cc69a6f813f651506718a5abd47eab61258ed4f365f8df599 +size 42427 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc52a835f80334e76e51e5a4e9fa0b9770bb0b9a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:829ac58a5c9a7a4c61d7632415d433c85494cd6b9bea1fbf693321ec605154d0 +size 48102 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aaed48e6339ecd5390b58733ad9b119478b8c20c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8339d20de203f16133bd8bbca08c47c10b93eae90e79c6d990ebb83cb160528d +size 63686 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca56b2fa12889dd8a2957b930434b9ed4ccb75e8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00908a0b61c52163d96a7fb8c586716fd92b0e6427af77d3238ece688bcf9f4 +size 81582 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ef6fa186079deda1bd8ae9f89a9cb6111356764 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8280377b802f6103557d9104b6dd1be6107ea7b25ef719121c632dc3aa6e1f60 +size 36574 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff129aac2bae0d5b64f388497044793a14795fa6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b615d612193dbeffe6831352043e75e7d7337cf8aff38b3cbb828ab093a558d3 +size 47847 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2548486933fe08834092ce42e7b7b8d414526dd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dbe3699ecff4b296a38897476d7ad6c5adf5cb6e4a9bfd9e89aae96c396cb18 +size 65295 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a272b08e0a0f1efda1948c7db1e6dce01845fb7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dff012e608f2fe05818dab71b21d2e84cf8bce29610306e8204bde165a5bd021 +size 33170 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90ff5b2cb80b77abdacf308849d75d9062596b20 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b67e75acf45373a7d49e3ba6dae173147e1cc1c41450bab4c86f02f010ca2ee +size 34861 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70d599aff8c066d0d057af3eaa8c236e096a9eba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545b1f8361d3937b276bde4cbcdc12153bdcab7837b89c54d299b7aea47df2f3 +size 33466 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..156e4edbd81bca3b2a330ce917235fde3490e6bc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63e6edaf97698fc39c8cb26024cc630a63fd75ed7f53b6721eac94fbf8dc926 +size 19105 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d3f32e857eb3d9468b895447dafa57bce1c15c4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03c85f8f47b418dba56df1db964fecdf0f71aedf82101c834e252b96e9501ca1 +size 20824 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4931b9ca570b0dab8a01559470c43e48e8243227 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2771f1a1f7243a72ae03e989cf2c72dcdf45cf012b9cdf2d760a71d822b195c +size 13228 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..497f2eb0c574e3de6eff64a0bca61ab6339cd7c3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc64c35bd397fb3ec6c50c030b20f66e4014b95ceb5d2ebd4fed217f6ee844f4 +size 34164 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..848cbfb62e180f24cf3fc78c2323567dde11dded --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcea4395bb2e4c26fc579c67c7677a553b1e1d929900ff4e5f31ab86e59d6b8e +size 42192 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b38c621c5839be1d6c0292edf18d0d6d8442725 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a9818e51ed335f27fd033850c40905926577eb36dddb134269a01607eea414d +size 58957 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d1ec3fe3cd2ad151b2d2d0a311fbe46b6814df0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:036081a5c9c6f0c31db548c78b3aeb3683761e9300d4bc9614b1baeb3060b33e +size 27513 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09dc02cfab288d047e957ea2db6e5ccca6489804 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f0f20b546a811c260d7d9abf3b932b701d6534d9e81bc8ebde0f1b983a2cb6 +size 14804 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40496bc0a9e91d10ba3752f199140ee9a19d6cde --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b76ab3e079f7121844f05fd78e850058ab4c9b813fee41628a9472aa1987d81 +size 45101 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b37b2c93495413d8cca9322e97292db1732e53ad --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc608a85ac1dbba64a4df4d5a5af0bdc48b90b0797219f5bf0dbcb11cb5301b +size 54255 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4310f6605c4941139f32a1c7fef2b0d3bafc8fe2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f00818edbf028e7f9334a546202477c4fe3159b55e310c3dcfd00fa4543e60a4 +size 74343 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8a722affc1c6dc9cc0f8e448eeeed20ec5a70a3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f5741636b3bb29091fecfe3d6fd192e15ddb0dfc957b17ed11ab834af779326 +size 34871 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86067b79fef49b348e57a1cda80f061163316d2f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f24cda7fbd24cd816dfba3bea623adc5f0c93a8884a9acac4aa92a5e5dcddd +size 39176 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f77d8c09677cf9027b5265b2f696e34183affee7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08b8c5ce2a26d063617b4f0cfcf192885bc7f3228ef697a4609412fcc38dd89f +size 82980 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_219/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_219/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a398882d254ddd48d58ef368d71d5fc8cd4a3e68 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_219/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6674e58fbba90a61892438b2788ee04b1443969b911a40bb31e3195aa054f935 +size 20975 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c8b46a068214ba0cfc0c63ce72af6831db14c8d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ac81390d90b97460a870e7b955862afacdc6a4972f3757b6dab738478f1f79 +size 15603 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..493ef160d382b64007cfbbc2c54e1f09b6aae483 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20ea25cd3e13727727ddb3180c2c3d8c5b09bb783161f7cbac5b4c6d9e15e65b +size 75046 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..254b3ac95a7bb7e8e45643f41d7b367ccd876882 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8561d30e3d1a705f84ddcddde7340411db1c59bf1c48d237c14db512e745b3c +size 27066 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a7654a3563bc8e9d795e20ba74c3a27430a4e6a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4836aa4eddb4e12508029c54816f733344520007f503765bfdda8f01a739f491 +size 44225 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf328baca4843995fd8a002054af50360c318689 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e5bfda8a2d595e49a1b4ba86522d9807eefad25443bf347a5cba6917dc81244 +size 45425 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb11cfb329f434f13d68bbd9689510df218e0b8d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92b71cdc346219c5d78bf316c4a032092fa29fbaad074706698341299d750715 +size 62891 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b658a33c3f1d8a50cdce5e8a277b6974c6709a41 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac898e7a09831644f3ab3fa8d65c58c4b66cd71d153c7d15fefe0fff0049c72e +size 81674 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33e84f444e861ef1255d7dc467f98366c5465705 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fec41b9bbce1c628ac9f14da65de52ae68017e22a8003b7d17f149ec5ec4e72 +size 36253 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ee454522404d84dbee43b8234e6314502e0d51e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edfbd9588b79ab9c5bc55dd25219e4ef7c7d170f4c681d2b06d43758c5d29fa8 +size 47291 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d8b02a2d5b769e6eba22340a8f9c93555ed1b59 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00253b5ee1d00f27caa67c186f92548575dce218810a1ec7e033bfd2d63fc7ca +size 64300 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..068427d2face965ed63ec9ada6bd478b89a13a28 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611e053cdea4a4cfc4679f3da6532e078233f5db72468a4b6278ebc05caab070 +size 33252 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f889fee84057bd5ba13b04484b909b6a5ff5a390 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61182bf579c58f86399595dd8310d5c7264776cbe6416bc13465ac2bfbb4bcd0 +size 35563 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea26c6643c2a584a324946448d5f9cda0ef3f528 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c73eee1eff3b93475d81d1365557696ef34eea427ba4793c66f2461461974d9d +size 33753 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b004cfc43a85e03d1f6f378f372d11653486f844 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:254337fa848ad19832505fd536ee8eb529290adf85a5a1629a9d51e0ea47feb6 +size 19100 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91e50852ffd2d6a1abf9570b37fe0fb134ef1c54 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9b8c7ea00d7444f9c1f8b30af945a7d0591df47f35de419f0c4b1ba7628dcf +size 20819 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea81d4b7facf02f05dfe229325bcd49b1c9daee2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c5aaad76cea9960dafbf0f2f47c49ffeef9460a5c4b255f03f4922a47e9b470 +size 13462 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ff8f9bac3ed8e622ceebbfd6d0c310441aae16a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:243e0b27f882350379cc53133879dbe6c8c3bb9fa488bd97964765a05fc740ee +size 32768 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9258fb0c99937994b92d77f66ffeb10a5c33f7bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaf11e9125352de282eaad2052413861c2529fbdbc65c81fc11bcc2dda26b2da +size 42071 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20c2f22e4679a821ec66e29d81aacbf33a7b6240 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:456159252cdd62de3421d33314d14d58bde957c6767001d2fceb638a8b0c0f87 +size 59076 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2626bae15a334999fbd6dbaddd68f3ec95afa068 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b193316516ef6522404bb4d7268ff36f89851bc3ec945aa67eceee65ea4198b8 +size 27294 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ce83adfd010a6c44b98ae0f157d545b366bf733 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c89d4f9040052eaba32312e3e7cd72cf2899ca06643c8c5e2dd0c87c9e027072 +size 14791 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40db785dffe607307831d513e95b57d7a399c042 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65101eb0aecc96471f0195f870f8bcdc5340db006a47ee4272a8b6f55b9d6e13 +size 45173 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46ec81bb95989df5c4e2c5f077bb1e9dcfe29217 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ccc8aa6881d76f54352123427f7a16fa4ecd3e2375ca17316117a13dbc2464a +size 54033 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d4e41abf22384b8dfb56b9b405012d9e19ad91b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9aac3fa00d3d3e8d432d4456dc41a7c1d64a5522753557969c0eaa0bea542fa9 +size 73945 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5964f9d1f9a85cbe94c5f9f5bbf8280e6eefd586 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32d589a0f74553d8b94bb8dddaad30f5a3ee2265fa8261ec134dc063b1259cc +size 34984 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85f88eb259633b7ab41b806bc67063f820161f0c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5dd16ece03410f208a0f883f0384fa532b1cd8315a165f0a022ea158709b9a4 +size 39134 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7989538365ac5572818ec96d9a4ead53452bb52d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a7fff02f88e998dd58914a44a29d91cabee74cf13e0f40bfe6d6887596cda6 +size 81720 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_222/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_222/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52601a704fc0d956631424f9e4d7b37a9c844692 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_222/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1631f34ef1f1815da45123bf27265728b47013cdc8fca1cd140e7855b0e2ed71 +size 20966 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6dc0f21f76e6cdfc00f7c58779c9b8cb8a9242f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9edb8fd3987e229b3d87714d2a6ae847759c62dacebf53a1978278638bfebae9 +size 17480 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17b7f60bf8a37950265c1b839aa7d080eeca3455 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:825134066734fc1746c74a03ae1299eb8029af0f59417fb0eccb18be7ae75c73 +size 74828 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d17da055b9860bd28e01f4ada53e36b655c5103 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5757a7d9584140b68795840377a8281c29cb4f03c4960d88d026010e38c1994 +size 27208 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..184d2daa1193154af60908b099f4745f9c8d3a89 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150c4fa7739b141e98c8323fa86303a0190a8bd7230e42e73c7d9ffabcc05f92 +size 43221 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b86b33db80f0f8646ae6821620be16800ec7629b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cddb2cea03a4c5fb9e6f2761327b6fbb7b013b4e5c1d79d0dfac34530ad90b32 +size 43342 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b18f92e34e11073331c4e444055b7ab3e8f60fe9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3621acd302465322841edcb393c8770d20167472822ac2f1cfb347702f952183 +size 63131 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3108840fa838e47ea89325c44b1bfec4240e8682 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6149afe16d71f997e5c612ab4f7d1c69e389fe49d2be2e75d4cf7e690cf6849f +size 84895 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb1b89fee5ece66935f71979233d0b7ebad01b49 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb571cfbc12fd99c1bb18de3d6bc20668f5629d808aa0fc1e6b714c469dfa8cc +size 36609 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4eb3b47cb15f7ff60ce22034a0ffef68bd672e17 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e44dda8afaad58fa708e4cb98152e0ed43541d75d8d8679106fb4c7bb5cb8b9 +size 48616 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f37beeae92db27910e3784513eb93e3e16163ea3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64b9e1d9b8e25d15afe6cb4853d020ebc67e40627e811e87b7ac8115293196e3 +size 67359 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b8145735edf4980f792a498e76b4c0af71ae6c6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20772af25cbc49313828b644b105503e967ced38d26d9a9ff6163bb7eed05c13 +size 33105 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95411e08a6ab103ba8944f2aa635f798cfd71031 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9571f721a9900ae37b327e0120eea14570a084416f364a76a370f75daf6b33 +size 35025 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d27d58f9a74971a0fd5c4efd4e78906e11a848a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d80a8caf4d5699f18f12e0ab48f64548992a46fa18fd2dfb00865e1c09abfd1 +size 34124 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c44f0aa50cff7bbff9ee3447e999212adfe052f4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86bbbc85cdaeacf5e84b67e3544ad5821972b7447e515da2ce1c94ddf125680c +size 19186 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..356bda588a558a0f55a11e0754337b4c55e01c8d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dd6c63fd9ab2b4eca0acfeedb8a84aa856d5e287225931097ee38f80db257c8 +size 20877 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f99ffcd6d60cbe861be0e09f3cc0a36b22d4766d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae6eabf438b2279cad67e16af4854b0e2a7b3b618a37a81188aae4dd79f58278 +size 13051 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab44dbc1887c249df9dd594b4200afe8a62a149e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f632c0ff4e85d91d94df53a2086f7f9f647528297b3fe80d657ed6bfce4848be +size 34030 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12624a7e9fad36be835cb995859df9476c5ee64f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d3bd4ac59bbb2c8e574156cecb52fb81cdad9d2965fdd40be835d730bc73cd7 +size 41709 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9809739d7c2e785f3f051e478331c40a55e35c6d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ef06e51ac2dd1e84d208684c0612436cf76c37aae6e066602c0413070c2521 +size 58308 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0c12fe3577beef5952374b4e443437c0a0b86a9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9af36e5f19cb15bb5d0e52779625b39890b414e4c3910f82d3174a54cf84e9a6 +size 27292 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d6c7444f786971a16ee87efaf2fc83139f2b0a4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:819a931cf8b1892af0e91cfd5049d8cd36fd5173c303820da15278b882f15e07 +size 14759 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a033496cc30fe863b176db8a53ac6bdc3ccb675e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8902ea3909af4522fc2bf533f63cc86360a6ceddb5f98879a28754621d2b20a +size 45325 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a3e6d987c50753d1865966f164b65ce233ca1af --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f8c3f1b0e432fb5447cf5f8890ed8ee208ff69dabab8b6a80531758732b0694 +size 54038 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..942c96033a6e00cd30e15b5dd30beaf9962d0cb4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:143c612bdd075333232162d13061adf837c383a032ed822858268cc530c814a7 +size 74195 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18e9f677bbc17f7f0ed11e5fe5d73cbd4b5c2b00 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aaed3a39f5bb1fc8a162ee7168a98d2aaa168ebe6636807e6805b558d606298 +size 34953 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bbc14321517ba40fc0e0e92755c1d152fc260ba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2438beee405f7ce547484ec9393a88b90ca3703d1b29f66ac9fff61021bf78f1 +size 39193 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4c09a0cce7ff9c15d90cec10310b2a99832010f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38d2b3ebb1a826e94a67d5085c651d4f549b9242bb58522892ef7dc8e1958ccd +size 80273 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_225/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_225/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e8424ca596ae42a0460ee6b17efe471221db461 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_225/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f93031896de61cda654495ba514d0f27c6f08825e10e22f4d7d7d0e5d019b0e +size 20985 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b73f1bc0f74b7a47ae2bee44ee67f6d6ec194af --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1afe374dd1f0e9ed39399595832e1a42693d86461a0cc2aa343f6efea558d666 +size 16865 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f66cec8637c1b463b1832e6a0ccae1cf7d045798 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22084d005b728c440aa963af01a8cb2619b544ac4fc9cb68086b2eb4f0091737 +size 75180 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a065e46b5370cae6983c4b1bd7b1f8399aa1faa5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb0eb519fb1e091263b4950a13dd80205558c62dadcd3a16f125721bb12b1ed +size 27167 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2a6138895cedaacc84cf5e4fa4863319075fff5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30c81922d61d5959abdcff21391a1903ed0a43a4de0656915dafb8726c44cd9c +size 45793 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d57d821abac1272e43a7326623834db92b1cef2e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14ab906673d527bc9e713af6c6c57f8b3c0c744ff9b5b12399fa730c537a6a95 +size 44383 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43279b3e7b2498cdf2128de1e98f325af8e27ad5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:542fae801a188c35aab275c1ca773147588ba7ba69cf0d0bced6f888a3670e04 +size 64085 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22761d15ce17397e0f61e43f1d78ba9ec3f6da04 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f83e34c371f7617ad56686d58f15ada5b80b329a658349e2cf94950f563edb7 +size 82902 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27d6978275b91bc453c133d62598ecad02bd3ee9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b73e586c59546433bf5e5634384098e2cb6dad6e26d291dd5f28ec305c56c6e +size 36376 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b39292f780909e4677dabe93834874a18841832 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67d04bf7b73ea5445e8e00550571f855983512041b30e3d1b928ff971a4f4df +size 48728 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e51d45666e5bb017533e38d9b18f82769d759041 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb40c31305643238d2a5dcf729213ff3af8e026a7af89d9e9e281d982b8fe64 +size 68089 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab987feab1966d97dded0f4728382ba559ed7d96 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:976de0a2b1b25c66d24d853d9ecb4338b762b08558ba8ad5de51b8008370f508 +size 33135 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e69dae5af648855786e440d22dee4069f2a8cf52 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9d9bd833ee34e4474d14eee9ec0a6734c191937999803aa009eaa63360197a +size 34985 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a151b9400021ca7d691a8ccd3134c437e5162ed --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5cf40b0c5574006d9bbf55b901be7a10d3c9b43391f68b6aa9e8f3340a620ad +size 34772 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45cbc762957df03a2f0556c65396220bb90a6be8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a87b2bc5a4aa81a3f78ddeddaa73c349c75ba2d1e61fc28c686e927070710555 +size 19096 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e4699ea3185fe12a9d6630ac6de102f0dea2aa8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:165cd980d7dfc4b360c57f86b6bdd63615cc39bf0c3a88fd8b55e780579850fd +size 20812 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6323b5ad517361fe5e9e5af538641174c4b3fe86 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20a2dd0b85dc686d75ea753e775c1e35c20ce93aaa1474c04495dac1a50a5eed +size 13413 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac4e9f0e83bf5988067104d3d5bd0cfe61a18e04 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcbbdc48e6f2f08d53dd7e696994c5a94ae26728d17ed556b260e4da83f46d59 +size 33765 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f24e83f1f045e54d82ff6cf4fd4f9e3fe400d88c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c40b55495ca40ecd33af5a8abd4a4408784827fb3b092409a3306b7af2ae362 +size 42043 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acf2178d3f43dc09091b5b71c26c0793af7f41a9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9f14e38e5ec5374265d8e776a68266398dd1bc30cb01579e4cbb3f12e7b6b17 +size 59275 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5aa37512b6c9768c14adab5f4a144688b38d2ff8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5887da6350468f7408f873d1e7b718fe8cc098deb69e3ed65a77638b196317a7 +size 27153 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..015170ddd4527bf7655e171e639de2b8b2a7e342 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc80b23cf29a03154aea678178b3f9bdb05d874535d31474d22e975a3bb1e74f +size 14897 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d1317df3e7e3636eab215bde5e2e7fb0e1117c7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837bbf878742404decdcf3b7a3c1ab5cf35cf46d9fac5cff41072ec1561e2455 +size 45243 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46721aaee54f99ae166bbf15d4c00d9ebee784c5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:764a6eb22131a08a4489690e2ba33573799d52053ec94e12b2945afa306c226f +size 54230 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f12ae407c91b234ed43715be7432bd655b965120 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eab563b4d3034e2419b57d93fec3ebb7e8c17351e630a081e9e788c4d0363bf +size 74212 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e33690be5da2d3e1d9626db9ce9cc43154d09a05 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb852027fdfec75d266bd38921f1b0bd770bc13adc3e2dac56c95586458328a +size 34801 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f54069dbb70a19c62fdea4c08c0346e63b1a64b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e0f1733217e4a7d647259d044a6a085596b7037fca614789be58b5765baaf63 +size 39145 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..444192c3264aa93dcc1bff4866a54e72def0088e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f00c06445cee447de2e609a528dac102ed1c2244a83122bbdd142a70462719f +size 82133 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_228/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_228/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d0f1e1fc41fb78a7a16284ac81e74b256ea09f1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_228/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c39b22e865bb98874c4d8b2bede979eca128dba86a0a019130a9108ddb8edecb +size 20981 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62306aff812ad4d23ffd09e2638e2f888b8fe8a4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f79ba19c4b1b4321723ca913c2302572b4078044a3c957b80a9961516b41e4f1 +size 17106 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cb1673f1b72260b02e541ef851e8c737da5d971 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a17517c9f504f8e7de1240fa59d28817d998c6a1486ac0c681bb29ceacc1ef44 +size 75626 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0a421212132e0276dded89dff8089d019df6ad7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9b69f299329c0fbe8e1106fa682148d6feffc42b4bf7c286dd2bb9c072a79a0 +size 26766 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..610283defc0f858b4919f63fb6bf19da8303c2b2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9daf1195b0cdd785aa53b7c04582e21367682cf57790fda5e9babe939a8454f4 +size 42195 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..815a22593f356fa004334c4e74c13392911d4f68 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f704c5c9a5ceb5240cc81b66c2154f507547e144ed06e911cc1744af4d7deead +size 47911 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..251e8e2a66d812ed5a5f423f6dbac26d774aa473 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7d3ea43adc582c21a2061580844095b830e2c93b5176be92fdcf4270e9fc7a +size 63897 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f5092f8beecd53caf3fedb8425e52b5698e2ad8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e5b353deaff7e8af073855955f256506f96833b554762c8ec6130c42073448 +size 82819 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3d9be25c9290b1760842bda8f2d06344854b6ab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3a25ed1c999149174611053080ec4597de73ac96d5997bd62b2d955a0f2122b +size 36737 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8b0f5900a4a1a3d6c40359d8c930b3224616f3b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf4d64296f2cc181a4404c7f1fbe3f02a41fd18b5c3e0950e3eb13d9721ddd1 +size 47680 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82552dd99bba8a3b518a00bb6a700a729ca8ddf1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e6a2ceb8782ed48d3cdfdfb510f115cd773647b5f47e1b7a0f8d71b4bf2050b +size 64601 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0a741ee49763a2f2813a24691487d396a06fb77 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0f4287994a54ae735810d9d712c3033c21e2cedd96775e7ff81655b263feb30 +size 33158 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fe88fffa942b1144bd2b0ae6113dbb97898fe9c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e9d607f14918279f4fd31667c82454e66ae666d5416e33e08a99d2b87bcf551 +size 34475 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6697684bc5f557bbda69471928054c0586ada8aa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8c43b49c2c8d05c084068331d2cf49f56fe7a08ae1fae8414d724f4c2aa80dc +size 34093 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91181cd156808839dee287ec765c3c55db1bbe1d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7524867183a8413f7902c9f6003053ef375418584dcc068b3fcc146df669008 +size 19170 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f1534e15f99ace350bff1b2a10af923281fe7c3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:140b6ab669027b560aee493d0c92fbc1ce135c52051247f0e608e09c13dcdc0a +size 20872 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6848ec61b84f05b7344f0fc4ab4a00202d991b1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6859ce51c49be5cdd649bf105ce4cc4487650856bfe29e108deb54eb076b7966 +size 13231 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..134ba2c791af4925c4dac23f96cc08db1f9b1d87 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2a6825e68ada316d27a73a2224695f2dd47d6496b0846c5d6d6473840ca0399 +size 33457 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a76daddf7dc6a95ac96a49ea624184a58df0136 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99dbb55fe1e18ff58a98e13977142e5b413b93804b7b3c2f4b9d8aacd619e544 +size 42168 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ecdd1ab227b780ed3271a4732a5911685084abd5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d0a851fb6b77860e7415a75341ee196ac5d3e7728cf0cc0d17c43e8f69fec99 +size 58341 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4fe27e562e0475fdb15ffe9cf86ab8eeb492506 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d35b9d31417b1a40368a3bded3c2c29138829e4ed55e46e89a5e9b2118cc648 +size 27131 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4c2e7f0ba0c5a2c71890d18b416306060cb8105 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e42823fb4468fa20ee587df8027eaadfdd43bd098692f31b4a19baf0924226e +size 14789 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dee04bec878ad7cb7ec8995c9fcc7891777d2b87 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b36af775baa204726fc9ae6d1b4bf7fb096cca757a7e18e5def0fa192b0e88f +size 45189 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fefd77119fe7b88baf2124391dd47b0a51eaaef0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebebf9771271bbc83afce48826880af41c709ec9eb43bd42b2634360ed24f33e +size 54206 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f7a0f27efdc21e3135faf922983c345387f8537 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ac4b80e01a6d306518ef16ae55f9141b13e9c3e4e5a108a9ba072c811f577d +size 74605 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f150856248c8b2ed4361c49267021944f165dc1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea76b7c419a2b9f70bbca0dd6085db86bceeadb2f9d1a09589264f124b031ac6 +size 35039 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00f7c4f66571f5aa54de1a89b5e3a9fbc6906a1b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31c84311efd9feb2fb69ce6e6f62c1c81d99658056650ced4feec47ae44994e7 +size 39101 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..856560de382da589549fff655e0af1803cdbbfc2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dc762c248841af4c17cf3605ea7e44f82df4a0a383946f060710ed7e3d5d86d +size 81017 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_231/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_231/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7c430bd5b16403cc1d04980c53fd0af7745eb95 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_231/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2e1086806e59edc396ba3df74b1a717993f911905b4df44efcf19ecc70506fb +size 20988 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..adab384a1a3ec39f44bf31da3add5efab9ee6fcf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13b1ebd431db8506974dd44469ecc49b6370fa5b71a838351828aca955c9a7e4 +size 16917 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..958eb462e86cab5ecf907a927699c598a236b0dc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48eb22c3f176bde050b3786cf7b5f3b36b2b270fd3c7e0f7f22a798905f9adb4 +size 75023 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cca00b7e127c7133902a5625c0dce088e6b7af65 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fd703b5e6725c69ab12a660b417e2cde6d486d7d4119cb1e91c715fe0743ee3 +size 26756 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba4f538d00bb554080dbe4a5b72f9b6d5b22da31 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db69daf7293baf422054265f703bd422fdb6432e2cb9b4c40160a37f626d8c1e +size 44633 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74aad3028e14e0b4812490bd22a5c83457ebc312 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9586cda3dfd3b95f200e5da010b06e60475c35cf6751bd35d36dafe17fd63c46 +size 45930 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b7eb7ea3a505e0b198981834ce101021caffc1d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8cbf351184b01b2d906eeeeecb9e72c852d1d4d808b4b567e52926cb497b7e +size 63659 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dffbb711dc45dd60b838ef338829fcf7f4fa69d6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12d899e65e07f769e627bee552d2d8161b5f5b0d761d2b94a18cdb83014e3141 +size 84876 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..023295bade15082bab9ba6d4231b7df32c49b87f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3de445f922d3378399c5604f7cebf31e905f0439d606795a46d9741d26b20c4f +size 37508 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e467b55c0ad5138a1edaa0d31be8b7c68d7a338 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f81941686c026104dfe765157904663fae9ba62500a0940ec9dce07cdfb5e88 +size 47148 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b578c424292e3aa50955021852e2a8b8fe45fa1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462b20c93fcdd681e8627ad1ef1f4b7f9455b6905cbecbe8a64844dad740836c +size 63969 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d733bda8048fa4b66f39a0412759d45e1a68a2c5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cf7f3f2d1ddf07f491bafe23c661ede6d3057e4ae259b72957bc5d432e68d35 +size 33228 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22150e96fef30cca995a3999d55765f8bffc7567 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e68124084dc599ff3c46ae19c33970f56f5e5d0396f5f4cddd09a109b64d62 +size 35328 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..527bff4a824f6cf011006288296ab6dc87cc1528 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4c9543611bea99bfb93fdf84bf604f2d888dcde69f48fb61ff9e53a082da8cd +size 34456 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01a0f57cba8cfa9bff09b00e2d78e247d90ea1c3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac2281d9702f87131486626632369391ebb7b3958728bfc95bf825bca1560277 +size 19148 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04baea19a411312daccfb5e05fdda15ae2a645ef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21fca987107393a278cfbfcc4d49c5abb7d3cf548c4f6d987e3ed304d27ec766 +size 20866 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f779a6356afb7a8047872842a6df879c0fd8ba1e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c0f43b4dbcc7ed094bfb4f32a469f91def1f71b52b9304cf79a8ea493fbf08 +size 13168 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3fa9eb2435a913f91e5dc83a28e255d311b63a1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98a9822eb90348c89a4c92ea2d4c878a67485a6b60d844547efa0a20586f4160 +size 33019 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc37c97dd52c7f0834041a096859e50ec73d7e3d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5b88392ce503cadad78547786f68e9758f88c3c3efa1dd5308ea5f02e6ca5c +size 42275 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa0c86a6b5e4687fa2b7b99ae8d9e7c749f0edea --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f59f248e74743b846963c6bf94b3b250794a3b361fa881820120d25cb4c29c8 +size 60522 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..658f2e8020bfcaa51b9e3beabe90ae5b791f72c1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ef57958808dbf39aec0a21eefc9115e952841c5f3148c25b76513d3e443b8c5 +size 26733 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f477ae7b856b6790c2ce5cdbddf6f7fe71f7dd32 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f91dc42056794bc677f81078e1357552b3c2ccda09694b542d98d75c913a8ed +size 14664 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16ce8c8cf99855422038ff042df878d1ed74e77d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677cbac12e1ca9af765b4ca292b7ee27b4188c93558653f56067df444a68c87e +size 45350 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e774cd401c1db8ec635df7ac7ada348ab5417ba7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b3c4a48aca5f746f922776c424c391a2b1192351f39512c1f012174e56145a +size 54284 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70211a23c52e3b7628f35a193304d9c26b4b71ef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4762a1fbf013933b59cd0db6bed4bc59017cd87d60d7d212de049174284d4cff +size 74090 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..681b956383228bca3829ec1090f10505b62bcf61 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dcfa8542ad6852d753217f3ce9a55d099cf9db0be5eeba52ce262fa4b820d2e +size 35028 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6794af68d33275ba46650e054671ac29e1e4785 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53883d1fa80caf081cb0126bd0966087748975576875ad7c3f858328457a9a56 +size 39132 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f031d8c555061a90ef710c789f5fce1000e6a831 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a50e85d949cf8461a64ac653e6419450f019d92f1e6a8ff947426555727b09 +size 80241 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_234/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_234/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8808c35adc78b6f5c783b9cf281b09858b69f29e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_234/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3dd769044196d8b2d26c870b2d7299c06f1b2b2b7d50d2fdd75101992b418c5 +size 20973 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e65d7fd9ced625a3f93fa05a5ce418d3a84d1f0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91aafb3bc26ae47e10de44be7eb9fbbee52c4e5681fb86564dc5624cafccec25 +size 16894 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25684c5e3406b5abf47a2cf4ad774b56d3c7cc18 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c05a13e7f31940a9dd6204a5db6d47836b4d834c75665290ed76f29d48c9788a +size 75300 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..737a79b7d4ee37084a29a47ec868c41ae86a75b1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e28072ac18edfee5d632654bba867e2a48ea6747b49a5ebcf982074afa26c1 +size 26834 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d8de8d1f3e961a4904196b0ef38302ae283de0e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:937f5914163d2a0ae7fbe9c80d4f2d24a95a01452cb3a42f967eb1171096bd7e +size 42261 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e2b93711bfd2833ae57fcbd23957edfcc50cdf6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df8b36fb75c42482ac8ebb0e3072a7ecb016d3d18789282b7a1310d834e124f2 +size 48141 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72b5dd42be455ed3ba9068ffbc93bf5eaf5387ad --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a12984e8a5696d6a85314fa712622c0747bfcf855b1f4b2dda4811e913a83b9c +size 63409 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00fcbd913123f449b2817cb8c470a8489be486de --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65eac262efa60d293579a165094faafc8832f805209633c7ac92d416d9aefa29 +size 76558 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..771e273110e558713b2cec3cff3b267694bbd6fa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a2cdbbff76714d1c1d5388642ea796c428e3ce52664b4529a6e3607158e4426 +size 37209 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52515d8d897025b516c87c5324c21e8e9abf1c20 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:156a947d36ba5ae60cd0655cc1b3c42f80ede969dc4ec560ecbdc87b196a3fe3 +size 47437 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..260d4d9e0da47bd2da865184b02d138b03644914 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e59cfa186e7c7cafe8bb9845ff04508f36e2e73c4ee1b1e6d3ba997cadb9dcc9 +size 62858 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e21eaf63e296405fd081a8c680689916fbb0590 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aeb8aa8f3a0106d26e7651a3bd81bd76cbb12bc374bc030525d96e382c7a202 +size 33174 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a2cfc5a03c12f6b27c9994b87c2bb5b0a387ef2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e491fefb38dee9ce302a7a389a50a963468af09c960f626fe6b59614a7312a95 +size 35103 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a8fc4e1dbdbafb69c8aaaa9e405875c7d2a43ad --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:551dcac21f5ed5ce1e5c52af836cf8eb1e252173567b25da4097513970962dba +size 34300 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3305b28632e206033fbec30eb95937ad94f6c9a1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69592c9add43cf6bb36704205341ec261b7c46d5e3c2cac151494259903680d5 +size 19327 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cddcbaa0a444a86853a5cccfc61eb271733b901 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b81f82cfe40029ebc2ae8fb7e265b5989afcc2d42d82e2e0204d95742754201 +size 20789 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..357bde72ed754d0b47ac7364bd238c33be280e9e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:061c4c70c2cdc56d5adb4d7da9444f22f1f39438639d14fc64262abe0f380bd4 +size 13293 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7df5433100091956cf98ee6eabd08b43362d8cd5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e87a155df40e7464dc766ae27976b20a14244ed2f0197738ec0ebb827cc9722d +size 33132 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4ba99d00dcc99765cb6bb7b8b0cd0c29e5aaa31 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4e488946d685eea7b438d34e89c8be5dc3172b20447e7bc7ac2732e2fd447ce +size 42326 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..642565453e6d49a5c979c39c35784c8fc99a64fd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96576040b455f4fa1c7a13c504225b7ab5a4c559821ce9e51b1080ef072cd811 +size 60408 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33a0ba7841878f2c579926df70904607184d3d1d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b8a9d68ae803da8d5c331fd78eb4e7848bdb41a1c2fff6dd00b00d992b88a57 +size 27689 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c93bfeb1cf53c9fc85f6b8aba32183e9cb6dfcc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f15c5c321b365b0f9f16ee8b864bf01d95f83e899dcd2c58b7e1c40030033ea +size 14602 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38bf776cbe30aa7b408fae33a8cf482ccb58cb98 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95baf7e7b51e04a345c5ca423fd8167ad261d615c65bcddba21bee62a3b51cc6 +size 45096 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfed355055c11ad7da692a794c54f498c5aded05 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4f94102623921928fc5e20220ad3fb1130f8ed9e7b16c950f07a2d66aaba8b4 +size 54213 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..079ebaa6f1af43445090d21012d8e0b76dc5eab2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c93a3a28f43951d021541ada9f7b0e04aa30e4f5ce8501612eb815754d6d3703 +size 74254 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1f5489dc91203edd4f0979f2c0339ff0c51250f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247ef61fb389a952a58c8a621d9937a8127f9681c155718f2ee01400f00bc291 +size 34877 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3906d03357adc0ce4d522b91bf5fb9a0e7494d64 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd480d59a36cf68e3692574475bce962e962675f67897bf9b5c89377254f0a4d +size 39082 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..307c4881ba4f7febe35515ea788504ac77a93b1a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fad542df9e27290939b5016b34a5b0c8dccb1437f4ecdd66a87631b09a21cb8e +size 83140 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_237/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_237/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ea27a12214934824268bb10176306acaaf44e95 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_237/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c406f835361f0b955e575460db868c71ba8af60b50dd74a832c152e94feb6c +size 20989 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfe6bddc9467ff64e7414fa877648f986a783329 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cd53d0db3bfe044fcc911cc21d227ea8f8de278da7a43b520b6aac05eb1aee4 +size 16096 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c4e5d21ccd5885f87122ef737dbcc164f5c09f8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93b3007032416cc171909bac94c3897abc1c9d97d5e2291dc304b02d9995a7f +size 75477 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ec36e7efcee9afd1beea461c962b2a665d28fe2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:123ec7daf32f0ba46664e9a24ca8b5f18a4fe32ab41c15d8d3b4fe3c6ac18c8f +size 26782 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a48ca30c9db8b1df0926e29ef6da8d2886858f73 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4f1ccb3398011d77987bffa05251253b5cf14195a872ec5759cae7c363d1bc +size 43232 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75157ba23bc85183f7160cfc75f9955fadcf3ea6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:939778326eacb304d0ed5821c340c560b280f41e90b45d87a69c8d2bf166792e +size 48148 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ec63a7b080d73db26d8f9e738c7a8e3ed287cae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a9e0f847bbccc2a68fcd7fb134161ec428f2294a7363b899abf9d230ce26fa5 +size 63560 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50ff46f1cdf9feb4094f1cf1e2f42b7a5413653f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48a479ef896ab934fd384bfe7000cb2c9eb926347a87c315196bf45253dcfbf +size 81128 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8b041999c3aa5e396e0eaf5b931e93b3405d413 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9febd1afe9c4c9b0a49370f60f28bcd0e5967f69ab6913eb4c6c0860f49f324d +size 36979 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f0a9665886568a030c0ed57d67369294f0e635d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2285a199751d29e088bc8f9013d769f6d2b18aacb9896f4a4c7780f6f2e8c680 +size 47901 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c4368836c769573d4e202cbff4bf931a519d7fb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32eb66819e31c2d2c7eb6a1a8045552527141619118102041fc62380469a0e2d +size 65847 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09a15f5af096e2a253905d9577212b212856a7ac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8109992d323efeb30fa6aa9e937fd5e26e6caf211c51bf18ff8b437ca76ecb9 +size 33260 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f26faf91dad107f175433214aca7f5a4a787623 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4d67e4c151d690a2e71cec7325cb0875374aeb2dc9939a1e5052c8a2fe97a7a +size 34609 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33ac952f2c673cad91a201d9c4ce4f6155bac1e3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:667f97f39c0df7a643eda94d7187ef8e0c391c908d1defe1fbe3cba14f511334 +size 32167 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b62af5daa7c9948e7ce39c7026f2c16a2e0c7a78 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19dc7ff9b2b73b482b1598470823ee460ba8caa7572dc3181561861a260e9671 +size 18994 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74eb81f1be922dd3240ea77b25cd047e4951c058 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5180266e439d1808ae61e34d6f995678ea5e4829169c569cf76de140b6293590 +size 20892 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed4a8e8655f7568f2f2d9f534188d3d3b1b08eb7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec396d79047fb019cee2a23c887d801b281d10fc973494d6f4d43523ca15048a +size 13108 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9302e4106a189fb3fbc7f17b6faad2278d69d833 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f12c8771ea03b4df10e01ee67b4392c9e5d58851db374e0d41baec3dc960b4 +size 32909 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f16febe53322f1ac5685f90c7afa108ef8fb5a2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73fc50e145bcff91ecf60ae5d9174ac2e7fab8bbf8da8d534c9df9746e1279df +size 42732 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..188f357b7db65f86fa74ff93dbf1e760c3122723 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f104d638f3b260480396b5c0f684ab45e56d4a9ada6d9e8e12af44864bf311 +size 60154 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6661cf666d369448e5f6f8f8c21277e453f8a2ad --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c02e95dfaf959d0bcc3f8259cc68f42f3c256a4bc84f172318c0092cf4ed8b0a +size 27547 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e5ff36f978c81edbbac433a2e0824177df99a54 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3ccdc6548a18b0b1ab41e3296e0de3130a6810cc9ec211aec371b0614a6a331 +size 14603 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5289cd9bc442f576ce2d8597de08966db1c60cb7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1f5f3397a5536682f1844e5f71ed508e3d43fd8a65cc6910818ecc2caeea30 +size 45138 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4dd4be2d936253fa9208e50c699ac3810f198370 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:924aeb187809793e6467a0f1fb6da28c9faabf3bd5837017e86ad39eda472778 +size 53940 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..780113661688c673ed075490156f691018a8abef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2cd249417a3666f799623998b7b6cf7094ea88edea2b7887db6eb85b1a9d34 +size 73957 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..629decae367429bab32da805d2dcc900e307aba9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aafe642fcf17a4bcf39a7600a1532a76b6faf4f39cdbe3a790c04fe0d7e7e266 +size 34864 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61cfa9835c1b77090cf3f71ae8d9cd8e36293ed5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a38a7f483f458309076c43846fee103334394b568ea72629f0f35c54258fc7e7 +size 39089 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b37f26fd5ce6ad6d768a1fda3762b222d9d71630 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:416f8fb022776117dea325810d876a780074ae39f3834a2e75084446686f1630 +size 88631 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_240/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_240/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3390fe4cfd01259a5e1ef6274113b696ca9f1e01 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_240/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:558284a2ff39777e2a3f59d917c8ad43f833295d29cb15b6bf531f215a3043d2 +size 20960 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7472e01ca0ebd98d21482d105bf5e1a91d21cc16 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b01dbe8028a010c1a81bc39001cc9b98a5071fecc009ab88cdfee40c2a7d97b1 +size 16544 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e1174697355ddf7004dd82bb14b1ece19ca591e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15b6143a24e64b5df7bc9d96efaf173c444cb9e843a0d7e05eddbc52f75d8385 +size 75256 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfe916421d430cb84af47d969262108d86e0648a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa10ae8c31cc59e44b9ac0f7ff5368ac826bd2a4866086760361b775023b1677 +size 26794 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5faaf161bc16c051ed52a97fa8e5ccae90c500c3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b8bb23a0be47f730678828d1bd199bd9d6a07a04ac1dcd77ecfe7a4a8794fc +size 44204 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c265dd253f2e2a58d146056da8382330796f6d77 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee106c59a7087e480531ad14a4d83817f3ca4fdbc18797184f50758a20a34917 +size 46811 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf13037e46bf10be6bbd7a0f7588d3e67e4c51ef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6e55ae54a596b8103fbf27d5cc9a4a4ff10eb5d5d8f177d4a3c544cef2caca6 +size 62886 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a2edd7a41975a481b5bb93cc61f62cd27336b63 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:febf96222a60142f0132fff1992db29e6cefdb40295d55d4d4902e58d4501b36 +size 83582 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e3ecba1604a4188162fd4d0d80d92b465a50e2b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4b91d9e89c04f9a41865c63f6b5e09c7eef6a1e1093b5a7bf52d39401f26bfd +size 37200 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a48c1932938dc22264881988d3cfdb1cfa4d351e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94514814db76d326d31bff6598dc23bba21b2b3209531ef463612aa0dbaf6cec +size 47609 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24fe978e9d508d75aa8fc803d3f704963faeb17b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:568751531569203861ca1f30c3c451685a330bc644de7f7361162580c7501530 +size 68367 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70ce7fd13dc59b0d77b1cb2a6460870209a09224 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e643924c26809c5da99f2262ae709377b01442ecc6c5214d59d0f35867703815 +size 33485 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21ee41b1eb187c2715ea68fc84a76b2b612298fa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd2a04b686afacc340f204e39c0f25c9799e2a38ec0e661ad4478e1384e3b23b +size 34953 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f953a91ed762868747bc5b7fcfc95370ad6a2e3b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81eb0c3f435c079b1cce26674cc417bcd37cd5b14da84dfe1c48ed58cf98a464 +size 33627 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c13365b9913e1622176d2326b5b4f8b7e7ae5a8c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a1758b587a5b9bc57ef23d9874b9a96e2b8b3ca32c1a20e185312c0c169b3b6 +size 18900 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7bbb3d0f402e3839613e65f35f4d4f84ebb874e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c21a06555638ed5ddd7032f71e32bc540ae4df05e09d06ecea8442b6f15fad72 +size 20894 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db26c5d3dffc4cd1f70ca905893fc3403ec05426 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d592b542172c98f4b2f45177f8082d396cc8e4d29d4454bf78e9524d49cbebe +size 13527 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f5df138d9cfdcd35b60d0c6c970ddd1105d1a75 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699ca4eb77099b1be77808280f255b73f41fe3624590b5ab6df7becc5b99bc24 +size 33651 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b031b05a32810b679ba3b54f4c15a295332552e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcfbc295ef5da00ae729d65933b648c37d63e2f4eb7f74a7e84bb2a9c8ac234a +size 42263 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..032ad08a091eca1bbeb22212f3bb42efad65fc8d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75f1cf55cec2611c79c4b40e538aa35f6a02c269ff06579cf6ab193b7dbee9d3 +size 61316 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9914aca42e4f1aa4da0411557e502e9ec4bcec14 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3b298a9cfe804c059cd321c966e046f78d9a903b882702ef0aa7f487c8b91f4 +size 26977 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29ce11ac35f8cce4bb37545d69678a8eb8673e66 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d5d4d9ae367d1893484873b3db22cbed415d01141896b74e137f8ba61b14d0f +size 14524 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2334b69792cafc44f56b4df32cc3f9f9983d68f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bb33afa5f8bb798ccbe32f1cd6a2210bd283407aea7bd7bf79b5400b4318d5b +size 45284 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4b0f4b6550034fd06e0f50973eeff49322a7761 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32416c698c5efbee78d010679450d918d340e7cecbe9d354d4d5850b90b36ecb +size 54085 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af91547d9c04b0f14f682c5c1990ad3518217c79 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4acf04c5c0c0fbf8f907c2ab4a17d76e8e72e9206bc1eeac109f0ba6cabb601 +size 74228 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3abee482f51eeff9a9e91da721dd29d35376ab91 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d274bfa8547e691c5f50701eef98bd1d601c31be81bb82ace35a961e8ca233 +size 34993 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..191b854928b261dd8b0af016b469b7f475eff569 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb7ebf1b4626aefc6e1b3427aab19610913daf27ee4597b971cbd5f8593ed3b +size 39095 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc932424e82e42ea5739370af642e5f58ca00b6f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0013070538636981424d71d79f38f1a572e1feb6ab2a7c334b7189e090e8ccb9 +size 84556 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_243/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_243/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a4069b10f6645b4c60d3709022e46bc4d30e118 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_243/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37391605cd004a09d9396100c0576156f7601366dbe59e7ac68b212719e5d525 +size 20974 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a72f2a7b44ded9ab4e82d63330fd177d5f834c51 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1da8bffe398cc03efdf935ff182d3d5cd62179de0e682abb6323441f94a14851 +size 16474 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d16c501ac52722ea0622aa1dcbaa85ff43b0dbf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b81075b6eeeeae2eea14030d9a2eb7ad995e64f678a47eb9edd41d2664a2757 +size 75025 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..933e0cb2d3925cc5014362199f04f3d000977e9a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bbafdf546656f04debb4c908128e9aae9cedc05f4217761398e56bf754d27e1 +size 26948 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ca01ea339979b9d4430198dcf05a19e1bc2cd22 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eec026c96065d9b0c9ca319cbc28c820bfda738859b1a5a0859a884270b190d +size 42794 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..548e270b10307f9302c0b02c9a8e0e76b6101b01 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de0f9cfb98b96740c08b33d48442d86353f523ce880b6f5d4a0574a37376a9b8 +size 43632 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dff482e21ecf8955d4dbe8a9f8d3709cc36cca35 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e731dcb417446b406c2609b1e415a1bb54f46874d41f7924875fdfef2ff87fa7 +size 64652 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b366280493e0e270c2a56850aa589c222809bab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e119c2d4986676275d1cefc10cf0196276f3046b6bfe22a84a806739a7069b1 +size 82667 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f047fe070cea14f1993f03138c2f3c4a90376e32 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6c0947b33afad039dc59c82af0dc26b772a918debca1856efc92bd489ccce60 +size 36485 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0de784f1be33feb179a4feb563aaf3873a8b6284 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a244e4e8e6bc9f7255daab7b81c8575fd2c93fbb6569cf0627e977bb81b9eb3 +size 48004 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b703ec34cc11b3275dd3fbe478479b50e40ddf6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0946ca8ea1dcbddb60289dfd3eb8b93fc3d19fdb7bf88036060647e7228e1b80 +size 66637 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0287005df01e76b25090c04ff92a1bb0b2a47317 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5de24170dead94647c1ec62ce9f3f8ddef32d8060ae5feaa7302602529c4f1d3 +size 33319 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9573dc7c214acfc80706e614d95999cb27b300a7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0013764fcbc0fa80de2e08d3895ca86b165078b416a96f9244847a646f4d864 +size 34667 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38b4436d3346d0ba792a1c468c288ad733688a12 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa5ca03b3d7caadb7d9242d1e64b369ddc4109e48ead7c1f005ed72a67323dbc +size 32910 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02d8520131aa46cfde8ebad4c5e476f4fb7deadc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fae52444a6edfcc7fea83767a635fea7bbf01597191a3f266f423aaa0e388d7 +size 19111 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58ab0f6167ed6047b44d49842b31f520fca4ba29 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72c394c50183175946399e4d6776c49dc8dae62c6edfc26b600787a71b51d833 +size 20818 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce2ed1c6e712c8f0433de3cfa8ce297a3ffec0f6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb32acfc8d9a3e797f0f699eff7e353aa64d5adc7951886c3332ee22e2c3c9a +size 13295 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..111411bd0c20b4d70ce70dac60732aa1f2eb0c17 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f2d4281329748233a73a25b2f9a3dce27845cf21eadbcceeb3ca94ce04ab38e +size 33125 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0e43be95d34b1702efdbcd192f17fc81ba7d0f4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04cfcf5a99221a193bac468759128182a69d029656a260082bc8fb901ee9fccf +size 41921 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a98cc46981ad0dae6322ecf11fed6437a7c2579a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f70f7029e3b5d3f45faaaeae854fadeb0f95e8facc705463d820e4016aa13b61 +size 61628 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b540984c23059b4618be718133c88ea51c0becc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad59f10bc76c13eaa724bcc59aa9f60cb376cd15d49a1620a0f57ee4f1b36950 +size 26077 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62a7fc3cedf05bbe49febad2a37c7887c41980ac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bcdd489431d228ebe5361e9f1c8081912b467c03c976488f517de81f7da242e +size 14613 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8268e9152a0eca704be034b9e3704935edb741a7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b927208ea4124e4d2f77eea9f0ff2768fe5750cbaf751880bf657741032946ee +size 45165 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d9244375d6faa0d28f1749b37865ac7b6c1a9f9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f024a0338f131064562ce9e32c2da66efa0e6975bb06f3ca3b85bad86951a0 +size 54252 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fbc1096b83d07b948d02533f297659896ddd918 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86cebca8ddac9590ccf028215c01462fb4e80f4b3187ec30665dcd2a11480f65 +size 74318 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72b7cfbca6018e4051abcd1fd63b8bf93e3b55e9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aab35d2e8789c741de3f3ba19cebe3379a043cb0329e55887b67dc27ad22a4f5 +size 34914 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7627920820317b7e06473e1dd7032c62bf6794cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea130823ef95703540c86a26cbdcd51d26292162beecb60df90dd485537a1a9d +size 39148 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d44e13cad5bba3f5d7c9c53031424d888273672 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb789d2cb59ef27122ef37bb6bc5055d03424dd2fb9878bd63ea6f85c367477c +size 87230 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_246/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_246/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f8753ae60e2b74aafb4550c1a6fffa6a2d82337 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_246/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d6fa945cdafb86ee84048f0254cb6ee541dbe3069354358fa81527b2969384 +size 20959 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb3080f03d8e05fa226d0691b4782f399b370507 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a747b9613da1be52f4861e695b184a6d91abcbf166885c4dc5662de3e50fa6e9 +size 16582 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98d3aa03bdfef28fcdea00ef4e2f7399a207dc79 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07057dc619e8ba6fde68fb6d2d6e5432c3a96568af46b415857e462b51a88c72 +size 75773 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78b07549366bdc10996d4d61ba3340eaa4927b9c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:decd23bc37f431997e48b467924075056249a4354f2014fb9148a6991edbcd5d +size 26625 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e35bc6640e18e5a0aa853d46168cd0c9d14576d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:342898f935643ec9761921f38ef1915a907101fb0ed877dbd975085188f0c122 +size 43786 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..082c0797153bbca993986cd4ce2c6753d53b01d3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b28771ffb5293a58243b901eb7ab3cf54851a82516269218cc545509003bcf7 +size 47374 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b76a51f0a2f5f147541174299e8bd3c7c1d7b3a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7996b707eb11bf9630b768bbbb2e3b7dbaa54717f770688f7c9b1e7ac75b88eb +size 62961 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5ca9703499d557cccd87cf982a4b40298b70758 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2943ec627804084d1936a0f3a416e088f74a2db56d63eba76a4b15f37c501d2a +size 85809 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d3c00c3b6c79374f4e594da1a91a7c218af3f4c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f746f62011a06fc3df0dabad4a76afc82c927c047aa4a40dff6b5445fac5a31b +size 36489 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0046faa928f3d0116cd467582422bdc1f270ee4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3c566dbdeb4b411a1c54904815b8bd92387089b990e40d1352ae0d3875311b +size 49945 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30a425c692ba691d0845536783759c0eee3630a8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:360b7366380f0a184e64cf198d33a4838543bf6b546d3a59318cd9096505ece7 +size 72549 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..650ca5bf0a56681aca7e597b3053e89cf5d18d64 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61092dea6c1db8ff39159c5987c2a5f8cb7e51d01e608a9e5ff3e8d9c91e669b +size 33256 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f02c717c2213431330533d20f7c83399fdf1a403 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f323d420bca2448a6addf202f8941dd15e5dea42c0ed9f8c13b8fa315c13a5 +size 35031 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d11d749eb4ccd70144876fcde190a6bbe1d797b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3492d32dfef11c0ddf325bdd6b277b11ca2382a8aca98a2758eddea9f77d6d +size 33848 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d539c6e76183dc2acd49a3a506f7014a557b322f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f60d18811bcdee68957420ce541a13b1e8c2242e4eda02f6145e13ef1ed8f6f +size 18937 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58a68022d7cae788acb994361fa20815deda4b64 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0829a8b374ae90f84813cfa81d6ea870c1a89542306ae896eaa6862a1fd33503 +size 20776 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0299162760b83af9d64b5bdae26daf99695e8e97 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2139874f9cddf1a61c63b286563f38d38c42096f45e4906773e8dde70200b92a +size 13583 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7ec742cb20491a4c072ebcbe595971bb689b236 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7cdfab1e3e76cee6673aff58f75e4d1ba22b2fed204a0f36e9528491391b62d +size 33082 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8ac3c4a56a7ec39e00d71000ad784844da9fb43 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562d8021e72c3165ea0864db715482f5dc45722ae9da97fd024adb9070058e11 +size 43569 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03a2503d15fb8028d8e5b69044ef165c77f25437 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd5e8de1b2906dd5fd382c514bf2d02e12b8887b2a7bd50832e17dd365d4963 +size 60580 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cb0fd29ec78b22030d474de24676f4f1d07c8e2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5550b3935ad17dfbc1c32d83897fd68fbd9accbf1d185be7d6dc643552ec1676 +size 27849 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03f60f1903024c6a040baf6b531163dde2ac9210 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfeb160c15019c0310a3a6a7ed9321f7521c91b845e8c46dd540b2f08e60ea50 +size 14651 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bd7c4f1197f1dece886e7ae5ab12a34f0d79eb9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589d154d65c4fa2e7266e0b4218eed5924f605c7c5dd7d34b4ea2f6431a5374a +size 45088 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..724f441085214111b8ce438560e223a12bb100d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69264857684c24722b12eba540ce6486c20736ff97e009e4eeb52ff770f20b3c +size 54151 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4a28f523d9625dc7725fb081176e8fb7fa97b31 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9538b3be5be1f7705129e4abf0db1a428900546cb14f36c52ed5c75f4d7751 +size 74420 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c349685d83de0b44b68356462f2f8385a30d889 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c870292f0b2981f72be309299e4504f084ca5f0b3a44d4b45f15d41e30d8632d +size 34836 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f176625a0255e43383d4bfbc2dbe32f28aaac58 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f175776b8a6c128b8da75e3c94aaecf7e77d8c15d8d2340f6dfb626748cd56d +size 39148 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b51470606ad919248f43949d9115313de68b3145 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49804478d7a7a4ffd802b8dcb92273119215ee9e1d683187042bd031e1169c66 +size 86567 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_249/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_249/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78e8118c295c41d575253a9ef88362f8bb247b88 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_249/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffabfcdc68cce52953b51c165ce8c16d77fc79c775815e2f9aa3dc453ce3c1df +size 20969 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e41433c2af58384a2857d6944820e54eacf79025 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:379ee2f4252c18566e5a60a6d29948d92c16fb80b85e8d7e596e719d881d3a60 +size 16736 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42b706d01ee33771aa40e278dc2534c975f7ecf7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c5a240e29d6e6676c3990115220b8a34a8afb353e94725c337e180557e28996 +size 75034 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abffb35633a97b511e70b81ce25a083cbbbf042d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf951ca8a8b68156f85558c5522dd1bc0e2bf7e6a310ce7731d6f1ee106a7d4 +size 26894 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d01c4cd5f981f9df8344734f1ee022178c64dae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf14341a1ed5ac70226fd34c4a328015f153033cb7340d13fba6c40115a931a +size 44940 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b50cddfb2dfbac4010f03e3c5e4fb5dfc9e38b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1698659eda8dbb4273d99fde636e9472b7de257e83e6714d99bc9523711b7320 +size 47127 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..553928592754e7707d7f5f958a2a17a8f02f0074 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3352bd0e649dd1415a212f53d3423b45ce798f805b5cb5d3f0b196ba23574aa +size 62953 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..073add5871d6ddcfab6c64b6112cfc8e590abf8d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50dfb47d232108df270534739604501989dfeb0d911b13b7a4c3e929224d92c5 +size 80927 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..333be17412c799feb53b25e88178d7d5f894e31f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47592fc70d4e069d7c99a6f5c4548201e72ca9df7ca197890cad8579c49d8ddb +size 36455 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0dc034adb380bbe353911cf5f4e486604e8c2a48 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4955ac45188166ac139bc65fb47b088bcadb216e67e3ed7824ff0a1ccf8cf90 +size 49948 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b59cfb19372689a086ad29edf21abcb7570bfc9d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e6d08c62b7d8a151d8f881fa906231258eaee298ed1cb22216cd6fe46c8aadf +size 68047 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e89369efe98dc6c4915d9bee582401798ad85362 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d044f8e90809d179bf3dbe05f7528e828da63a91292eec763ad0db3be793210b +size 33244 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..596ee53d2431c54d1c6654af22ab44324453a27d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64a6e7bab851139f6068c756ad616a877639adf39d4ab623b891d39c33b509a3 +size 34901 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b61a9229026c2d2c781013a5499d9f836f0c06e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab891d3950b179f6b0cb3f643e1285778a42dbea8544630495a491862eb73ce +size 33832 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf120764600c0265870b7ae084aa4cf4102934e9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0fc9727916b0134d5d10638b725b35ce402b56821c76278836390a28472d18 +size 19106 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54cfc4ad9d134ff55c2eaf675d5a75465bc09977 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58f5d3a9b724e01baa256ea3e4f14f1f128fe6a845c88949287d9cc802a35b45 +size 20779 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39a7319bb2847c6c3c624dcd6eb9167d943fd94f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13875ab061836535c602fe0969977a85b646f0f53467781a63c24ecdf4727c98 +size 13744 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9971f176505755081721b70b104a72dddec3456 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2fbf08614eb723ba16f2a07f4692c3696500dfdcb0a3403b5f543b677de81ef +size 33359 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07a6f22c1d0ebed413c08ca5a93a040b318dece2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:255e8d0e66a4586ebdbffa1a23f2070cba03c175bdb988d660c79e8c2628004f +size 42671 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06484822cd1b5cd994a364c51c2c0a4acbb700a2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:599bd8aa6c7d0c432e9d787388782d67a6aaa351523f018f73407a32c6fb9a87 +size 60539 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fbee4d805028b220f327d31f7040f91feff956b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c772b913cbac3afa9f1cc6d4f769965abcadbc4cbe0845e51a3202f6f995249 +size 27450 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79fa1de065ffa71d1d5ef9f66a1badc5f11fb7ee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b990c1e1fd518cbec2ce4a915345f6a81e48018f0e037b98ed65aa4af954bec +size 14593 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..028d3fb517f1b698e574367a6420959e0b707fca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3952a12451f590374f0a872fd278da7ccea651a8adadb1c91f6f52df278155e +size 45182 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e131fa7abb85a886a042f90c2e8545e76379ef8b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3a116254e465414d2c70d28b3d36114a94122ddd5e3638b06ba6c59b5e53f5 +size 54032 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c21007a1a9f0611c820f473d50fd1dfec9c27a67 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5c34470d057841bc1f00bed7b8515485bbc07e40b3570955e9b02a502c9aa5 +size 74101 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a46510fa74ca9dde770bc959326275daffb3ef20 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5104f4cb1a9f803a7ea323cb3d4bd7d5238c93a530c2393bc1db984e235f72d8 +size 34962 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c47a75edfb02412c5be27e1f34a50d490561dedd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfda64ad6e091f1bced79bd39dea0be56c65fc56882c8eeabdd2f3ca4d3b4497 +size 39125 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..720580814907c7ecd01c25f16c9a2fead2b093d3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e389f2b797c6c7d59be6a627fcd646349e7c709bb3c7b38350d97d0fdf9256 +size 86529 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_252/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_252/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de457b1c00a94d1b6b99b2194217129a994f9bef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_252/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a920a27df218bf559052994855a6d920e3603adb06ea7af4958a9965a8e32358 +size 21001 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..debc59863955be775c9a87c83e72de74965ee526 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93b7e3b5a86f4e13b185642fdbba4f7f9715ec9189318ce4c0fe87185552ce23 +size 17149 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b094e40528574260484b996ae6a0a9ce7dfcfc86 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0fae5cbb9394edcd38447960d6342075ef31fe40d167b092679ab3f772cc07d +size 74883 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f55aca47323c2dd29d6aa18cc9106f98acc419e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f608edd8e26e9063029a261b700c9c3ab894b974e2128285c4201259ab0c108 +size 26765 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3defc9ba0c84d9dc2da5492092ef80f6c725d545 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5406d69a84acc1b99ebe820d139a3faacd4b196fd2419298c0caa1c48ad7938e +size 45215 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..366862eaf66d0316cdb9a81effb0847cf8ba7631 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:724b03f882eee52bd4b25d37c87ecd1d42198f6b7c4967b62e7033ee7ec95912 +size 47351 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d583142de0af29e5c13e602face36a5781e25e8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a354b5c4ed53503ce16789e0b7f0e98b7d16a90c8ae14dd29c82988a02643940 +size 63725 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f281d580c49c640bf0664327d58dddf8885c65f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7323cb82731926302660060c4ee2ea8b161b1c5657dcc7b3d05fb650aae9b3c7 +size 83143 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aac3b6cb55413bbe2696dd25b890c517d4dbbabf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f123098ac62ad26ae5485d18a3adb93499748d6d55007e8f49d8993af419048 +size 36319 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2df4f0729d9f54a9c926bfc1ecc9698b128f25de --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f696f4830e88aa60b4ebdbf46a03dc2b0cbd2ed500d0bf1a351e5412b9c8d243 +size 47369 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebfb286e93623137297994cb25d1c01a518cc40b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11935890f33a6c3da6959041f6e17c302cd98498e71d58590cfd473bc703f263 +size 68863 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ccdf1a5e028d8cce2fc71ec0fe21f8b726eea9d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898cfd8ec5827bad9523ad05c74db8da855bb8d1fc0ba0f06eeace68c5237031 +size 33179 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c14c9d170eb3b2a439aeea1dbfb7a9ba3100447 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61774e920c1d4080f3149d9672e8dc7fd8807c50b6404327e2320737c4cf5ed1 +size 36002 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20633fc8c2c0556523e2d55e2270c770fd5e735c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b07a068baa9726ef73c642f966e276b3474df31dc5e0562eaa3cb1d61fbcfd6 +size 34283 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8601cab8f031c7e81ca89581d28bcbb0ba1174e6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d88dcdae8e946b6bd13f6e63d0e646b0c3d26cb4b6f7144e41daa28acc7126bc +size 19562 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9372fd9bdd6d653ce1163f5d49eeeeed349eeb6f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9e28bf2202452477cfee25e54adea4b37940949027f805ba1d0e25515e9d8ca +size 20774 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0501cd9a10aae3cc4ce5e7feece46e3fe7328e4d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7f3e6448f136fec9bb88d07b44238c771f028aaaedafe2777481bb9d3a82b1f +size 13584 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cdaa54b072d9f8e4d7d17fc24f33fa26c8f8bd6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad3b9825d2728405722edf2e26d22901001b7c1613a42589cb19261ce149eca +size 33128 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e366ee3aefcadadc33cf68ec86a255aee0578068 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3ea530206da5f7c8a76e596866779782ab9c5519ace0ca770fc4f9700d54895 +size 42264 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..baf4ccfa4bb1dc2b9652d9c4d70f0634f76ad35d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b24acaef4567acbf2f7a848dfea3a952ede774dc4e4261e99eb1218bfbfeb5 +size 61293 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d054d33c4ae7a2efb41f662d73ad564ccc76fe3d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c7262288f7e525033386bb8af9116774d6995e2d6186250f529734ab996000b +size 27608 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49accba64b0f76b1f4baaa985acbe2cc30fe7562 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:281afc941756e0e4c0e58f98994dc3f45d7bbbb63403f34dae682808e6fdb8cd +size 14784 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3da5662949d0198b6016355c3ae1a823a56106e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc4f93f2009869fb33aa61c3a89114dde66956d3b029d99778368f529cf2a306 +size 45131 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9dda2b697381f8523391a141646d0af250dcb6c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4164be2ed6845617f7a2a7c9ff0578474a2bb72b6c8ee8723017d82ff5906460 +size 54125 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33b2ed5f4ab88c04ea5041cc79c02afe56afb000 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d8c5e7edc152d8958c560748b651b2754fcd8e662766dc6860917659c18ca22 +size 74228 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9eaaba264e337d830850638fe413894dc758e0ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ce778ab2f8cb39603a63caf48b8f12ff2121555c99895ab28f988d0cd152431 +size 34776 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..419d07907e940de363aeb96ceab0bfd96f39a9c4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6239e3b963b51b5d9d12fe4c263870e5d157016a3b3ffb20b1986976a8286dd5 +size 39080 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c2783170e52150669c92616747ca3f71e311157 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44c116f4ba0f19ef3cc4d07bcf28102b31ff9915b4c822d1e1a3f11702252bbe +size 87608 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_255/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_255/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5842bc31ff5c1b7b0f60eb652749c6e4a3dbc8fa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_255/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc5498407cfe38df520d184d8bddbb61d74bbda46402de09fd1d3e4134db76a +size 20993 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e87bb14f7e0088cd1c40a6e94a50aa709f027d3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd970ca5b9fb32bf5a81f871ef2c5586a2b8b54ba3f11d243c7751aba3b01fe8 +size 18583 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ca5617edfab151eb17a5b33526c4c8238e247af --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3468bab91ed97a6ad01c6a7d06b9af2bcfbf7eaf3ecc1a773917d7ef2e22a149 +size 74642 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65917ee1c0f261e3228f0eb174db493dc9909c59 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ef0f0e8ae63d2986aa0a0627def6b41c0ff2645f6eebcc68b70ec71a1557ca +size 26930 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..971c7c87fdc486328e19a9fe58d667d4993f8731 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7c982844f9f247aa2dc205ef887e521c8b9d69a8fb3e7d32422b10dc6f987b2 +size 45476 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6678536996c954472ff9189342bdaeefa3fa22b2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb6ef20e398c197e94ed6de5b74926032914457cb1f3e9708b9e45f6884fc124 +size 46379 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0046c25a5b889334e3bfacbc6f263f1de6f49da --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38a965a4d4ece3655b5389918bee4ea76a3898f4cb0bdf440212f5884e6601e +size 63823 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbbf769b86bdd8cc392eb256ace49b864c9292fd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17bd900eb6b91a4f6e4adbed761c1f3e929c6b3f081dd0149819fe215f836d6e +size 86550 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0bf74672b3c71ddcb41caf406550080c4b04100e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd69f9222ed0920cc0c12af77bf7b94cd444ea75dfb3fb038d56c53814d4ff9 +size 36372 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7047fec7afd16adff23150e88123527336549877 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e33844ad1b5836826a6ba7523ce911e6a33fc7dfe63c013b7943aae289908de +size 48354 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53d070bbbe60e2de8ea5ddc4758211ae10a27624 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b6c39d93f45e85e723036240b44637f8c6e65a2c344429d58ebf94688b149c +size 69525 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0d66cf13bdd7657e883d043db9f9e1308422e7c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94fdc8720e4069186ca6a9089a2e51f0f294a13ce0644dca1e7169b46ce6ae29 +size 33398 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3184c7bd9e07494746e880c105ce3692c6693a3b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09aff5736d38dfdbba16deb2a58ee11bec9769a77fe3f6ab8511bdbf838f9106 +size 35232 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f382e7a6508606ff5aec79f975f1dc43746b23a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e1a87e3c7443f22ea262a778389b633bf5f452cda0834363d6051eb2d028f55 +size 32538 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..085e80c68ed13c33871aca16fa6b9778b22e60bc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eef17229975bc8bac3f94a30ed16415456aec93650dd285b16bd176112b4941 +size 19241 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b43d363a972a42963b20d9716efd20b3d17e23e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a8bd04e6b824cacc7137873b9f01a6d1f4c4d38e8a3d422c59928c9d78c27ea +size 20850 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8400b4b217c3d7185d05d4f66bfd47f4706928f3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ecd16551a04754039fc7e47d96821526de034e6a15aca3394b8d22a3704efea +size 13462 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80b4073d7019dd059a7e7ba77c2b1f08b6400aa3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cfc84c9ce9914e9e7a11b567384df2f43b6bfe8c24ca63da94633f9fd00acfc +size 34280 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..258eb0b3739aaa3d1d3390fc084c51f185a68c27 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:714fb721eef81cfc0150bd62518d0238724532a29ba09a81c76de6de6227f526 +size 42556 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08e1f0528f6af1dee7334853e8c152a3b8ccbfa4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ca448179da7e7f9d7252d043fd63e195e210e750ebff55ff52341eceb26a4d7 +size 61274 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b83d97931e161645d346fe54a39a802cd3997ab8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e9ed0dd01ac417b50183b04b71d886b313a07c800346b05c4fb71e72a2c619d +size 27881 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9f64f63ad7e4b33b20a122687bda5bcd65020a8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2ead3e7777aa345fb354bf662c47405cc4f5612cc660eac9345e51c1e32a300 +size 14696 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56c5123b7037440403c52542f3720f747b27b239 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6f7df8c5158bcfe61b34bd96247acda5779aff7c2325a28746307c7d290b282 +size 45099 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e29493fa0cd8221c56513f5f421761ff01a4b574 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23905df436d865803a199f0e147ee882cad375bc9cf8a65c6995fdb48ee6bbc5 +size 54057 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f2d32da51a4b979d11be7ff76a56a0a39907aef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f78629b74ab825b1f9f9330271217dffe517b364cfe46d1cedd1064b1c7d10 +size 74307 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88491f27cfc3448d3f504510a40c5083399b3edf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27bcef3086f9803d92eb0c6970aa4f65929c38347425742a37d5417c3f90dabf +size 34896 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ab2accc1832f7a5d4e7ad62aaf0c326bf3e49ea --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26380fdf243a98e2e980e28a6e793733fbeff21aa54c58ae0db18523fbee8628 +size 39062 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce622c3548a555aa4dbc6151eafae97ac73fe4c3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a87825b6e40583503dfaa9fd6890377f6a6183bf98aee10419fd964d930a16c4 +size 86199 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_258/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_258/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a36f9a40344ae43f07609a23651da4fe7d6835bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_258/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:990b4e975102f6c2053185bdce48df809f3e4b80ca5ef24887ef83effe3c3810 +size 20988 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..587e0ebcfe0465c13ad4f04c487181ec7556793b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2c24e17b9c607e77bd502a155f76e1af295c780b44fbd0f1134caa6a1f8292 +size 16756 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1efcda8f83feadd9973ca45c3c615c1f80dd1e4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42d3dc8f1bd6935fec1b342fd30f5945221e0936ce428fc7336fa62a0028a95 +size 75260 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf8b5a8efd98cbfe33365da3026738d85650e784 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5adddde03739856e965e64d6f27f9101f70daf783ce4eed4eee5f42e77875cee +size 26610 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6b98fe049050463161065ac76f2cee00bcfca14 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8736faa6ad44939d2b7486e4af710301913971c65d6042a99fd32c4b08100d81 +size 44624 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d8fb3816ccb7ed228fac9cfc0348990a8fc16c9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:970c2ebf8b7708c8618e6b50b645eb58a4fe31797f6e5142b78a6cd55856b4f7 +size 49969 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54cbe91747ae21b00f9b76f918eedfa1826f5c88 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82490f2f2fff9d4a9a58340596c5a135a4568daf62ec9a87b10d08a91e43c37 +size 62933 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aeb0c6a80c8dfba5aaae4821176b98e6217dff5d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b1e4fa833614d68b68ffa38273c7ff5c0c2d3bdd83b64e5e2e6edecdd82731 +size 78353 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9650a847786a0f4097ec54f0537b33d25c98eff8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f11b7568207ac9cec7a5393412200e6ce08687033a042e780144a5b17c2c7cc +size 36650 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b05cc8b6af6b338f528269d05e3ca7067b90d78a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2082748960066048e1e73d896f16c99957505c3b0f8f0b60bf87fec174577e02 +size 47387 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43f9d279c944ebd81c13f1b723ec03429a81608f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf95c4c290d39a8ecfdd70a0e55093bdb99b8fc7c3ff9ef3d44c7f83921ef22 +size 64448 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e5769fe7d6f76bd7b6af862c73e41b8ee1223ed --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fc68b07308a24f410b9fd423ad9874e75c89edeb7bc25bd59f7ef6c1d1751d7 +size 33218 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3f04b70d34e38f296dd909e3878345f69dd875a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b23a9c82410f49a1d819454936457cfbd11e681ca5243560f178f670773b6fc2 +size 35410 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2990fb9b5f4bffd5bee32bdf70022f7cd2245c3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb85577cbb0d16ffde6c89dcf6d1796cc640884249313eed1740b9704215cfbe +size 33746 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5affe1b3fa568d7290000296a16a43863b08e853 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56839c41ed7beeea1a604d1bbeff186a27446953a25b853909e4bf37e3cbac5 +size 19308 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e2580ecc82b255218bded9c7b7f56e514c573c6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a31b89b9e843b29d606e07db24ddfc4dd9deb2d4fbba9dae63215b0deacc55b3 +size 20733 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ce66a3c240e0bab344276cf7f9a4d7c7c05c8f4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc44706bcb169f8d631e8abfe5f0d1ce3dd270f0092175ff2faeb51cf397991c +size 13143 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e04f800314a68ca1ec259c6aaf9acda45e219ed9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c0bf6615676f9879702778aa46ed9d8eaed9d93ec17b7ec2cacd6d34d170b3 +size 33108 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..712c15682288837779ad671be1796db6fd0a52f6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63620a00edd2d5847550e64a9d456b38b882573fa65909f3393abf26cf07136 +size 43002 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..671f443be646462231df776e1ad3495040ad5ff1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f49233639de89d5587cbde07a61f516d19f73ad3893e66e5ce6d1f610e56937 +size 60340 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4cec1c6220e2fbd348c7ce5f0c6e7298ee00719e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f242295e0da7bf651295d4541178c71b85a5e0eeb710f4de72250add38ce6de5 +size 27979 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ac28d49b88e2ec510b20199f2ec8f75349d917e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2745cf05871735df8b47a37d3a0990b57b35b18b3729a26e4ac3c02d7c103555 +size 14812 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7eee0c1486fe3057548b4fea88fedb0e6b4b9570 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14f21bd9272ed54121e19a590ff65efd308784fefbbbf2d6b3b265344f669f8 +size 45153 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1abf1cc1ebd5c4240f05d1cf1c25b57dc1fb5b43 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55556615947665e4a18d9be86131c89a8fdc7da3eafb7cf0bc54fc19bf808e5b +size 54055 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..737726c4200ebd2c2dbb6e0274d8fcbaf49dec6e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45b97d7fb16a7a8f186f82811e80fedfd20a695e488089ba26189414ac93ba6 +size 74120 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07caa205bebe152ee9b755d7a01e00f0ca07b1f9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:057f754e80e1b011782717cf1589676ae2461e06ed0148206dbe2de2549555ae +size 34919 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9459b461a436c0377955c527c248a4f6706298fd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56ab5857b64e1451bee10b36b3833999a936bb271f129590c5574fe94ccc42c0 +size 39093 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14fb4055a307d795e1b77a5e35f84806dab40857 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4026e4150f166229592debb070b847323437934c037d616a00e2b9d82ab6e0 +size 87972 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_261/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_261/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2d38cb66776b30f197c7da0d83620b72e66c984 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_261/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37901aedd8c89f36ff6b99406808e9927aa84eb2cd2020e6ef951b8181efcb22 +size 20974 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04f350ab8327ca70ff1abbac67a1957e24b1bcf7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7d433afaab37df5f390d36cb740bb160ec18d7894b8a74c59c671b181255ac7 +size 17391 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6d5d33bec72cb404f01fb447ece8ca94271979c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e94926e088943a3bfe592798cbebf689c76db46095d82a79ec13eef5096b33 +size 74586 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e3d736e246b97e823ffe7dc42e3b44143b0875a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b1bbd55814c19cbdcf780192f4744161b39020b1728374e4dc0827dfe63921 +size 27094 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c5dce3615554b53370423f978a830c5dfe546c0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e75aed1586617957e1c7375de58e5b2ea5dde086d00f3ac5abd44df648bb5c0 +size 46118 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fc47551828ade10c38173a2505cdada04c6f24d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:705649d23d88cd1943ec03a64e3d692829a08ed81ccbb8c502f9ec31481f21b2 +size 47212 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cebc6712dc483350cbd098c960f64886ab387490 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e33589cdfebc8be294400ecafa4130068fb6939b277d82cc00d98773d1e3ba8 +size 63705 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ad9f73cffe2249a22b6a734f921f613eef1615f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:739101444104ae113029a62477c2032ed67d77c4db694cb922990dcaa5dfeb61 +size 83845 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5262bd2131cde85c30a045006b89e8437eba489d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68a0c1e17139051d2d1e6f5b77dbaed462c238bd2fc403ec5b4ab15843ed92f8 +size 36793 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a6eab6301cdd1c85a7d0892c717d05fe8b429d7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b082b17329e9a71f01c98d11130ac7511af305154ac0e6331e6ca620bd2c7eb +size 47827 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccee4fa7fc92ac7268a3343a59d30191cdba7fe7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9f50338d96395840a664aea42ec8bd0c7af15468a60a2b8da10bf447ad5425 +size 65494 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fbfd4c8e89f34c55105c95c686d2096732e631f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3382d2c6a2296c5d98a54802d32518900d81d55c42828b50c83482b7b7580e +size 33256 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4152003a0d19998198b22f23b70ef2204d547d91 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:120629eb27eb532a3e0f57b0371b9893939b90c51258b2c8b7b62447b16a9940 +size 35296 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ed0133cec30cf206df62d950038dfb11caaaa18 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b5df799eb14e8a5a475f5ff1612f7bbc25a42ab3968e55bfe61efd16697141 +size 34572 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4e88f693d7d454a88124dadd601533e62a4c3c3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff5432cd23c4b861c414acaa3e7409eb07717fe44647b4a52fb446c8283586ab +size 19152 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..288b966155fa41984ce4183d1bd3b0b4416ec171 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff28009bd84d616e2d65f61d2223228201d27899146eee205a6c24f39c492b4 +size 20791 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ade614cf48a354bc0100b0fa73cbbb05994e1cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:080d9536a0d9d38fe934c1ab52c8258103a116841a4b20b2b48f9e07c4925f4c +size 12930 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53712f261f4c2a4b617fcb0e94f070fdc0204b87 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0b9b2d66ed2f8a69c72274c91254fa54b3a3350097f0137c712b4e9c1d277c +size 33377 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..618b4ca5a7762fdd2c20dc26d887420c33896545 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06a6c90b9e3f32656bd0a71222b6e6d2b55b82c02888b0ba7fb95cd1aa0cd3a8 +size 42817 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17e430163282ba4dab89c6f61f05a287297fb138 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30ebf93fb914421372a93b4d3460bad20c5864084a6b97c96a7ce996a052081c +size 60346 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b3e5860dcb6e5ab3034e95d1ce552d0525188bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6428866fb6653685783c1c740b3204106d2510a449b4cd03372b3f117381999c +size 27369 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6fa85b3830947696cea4532eacba0b1019d1f7c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af66b153f73e3a91b84ed754671efc7d74d1b454d79d289ef637cac249fda12 +size 14719 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb58344b8884c7f87e0e594d603563fc2f0e93ec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77acad87f5649affb60e7b6c33ca44c4d474fda3a2485075e51b55cb70a572c2 +size 45135 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..003e4ae18818594c9d8a814184e67e0fb6f61977 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e52c563de62de216e373bfc0f917090698181fb883fc034615e3af1e3a383924 +size 53979 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a42c1b73c58bc56f44e741e3f0ebdf30cd08cec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f766216cc72306aa1c02a54677567a15215315ec4ea672595096ba2191ea5ee7 +size 74174 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bae43f80dd9418e6ba02ff1ac5eb515f05b5d61f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc914c1178164de437d2bcb570547d2088aa50a15c136c5c245a0c5bc29d057 +size 34886 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c45e6a0c9b7ead2d0d47323809cd2fd5502e07f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c21212c92208c0e28ab9631a9d3a71965a8d5c5c2146dedca5acdd2c3d1c3813 +size 39109 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61c188df257457a4538cb9769bea1c1c0dec0244 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1610844af495ff34f0cce4880e922b0292beb58e178b5166ff307305e0631cc1 +size 85845 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_264/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_264/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d21b8349c4c589a359eac62aa9b003f4289ba1d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_264/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:073fe7e85350e3baab466fa712261665c74073e8cea71fad8b04ebd6bc0cd028 +size 20968 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f82646a66ce2cfc6e0a1320d48d5fe5d0470f41 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d552130c922a72ee512f2926ef48f071412d9e099d79597fbc4c7a1a5933db8 +size 17538 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1e05e80e6d8f520260d1c21eb8077a5a9eafe7f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d32a1176e6d6d3f67d76d85448cf369327ec8aa0765a7b588be4d90439ea3a7 +size 75023 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a90a4ccb6daa87103b039d89b19a32ec0f29e80d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9489728ce2495300d4c6f0323b2a50600fca31c544b1ade3a82cc68688b846 +size 26878 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..884bba556c102937afcdddde0cd3cacc445889fe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a503a6e360b465c25c1bdac2ea860b5f170dc6c3f3eef6890bf2d1758f9c776d +size 46035 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d5c7e5556cb1c0fe610d594375da4303ddf2367 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccdbc588d5afcaf0466524372b41f8f6421a324accb04c64963aca6a12fa560b +size 48032 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..593c1b98f3b1c83ca2fdfcf8a85e027712e1199f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a32da7fa003be17e30938aee2fbdfd8397fa89c6f22bc8026be0f9917f184ccf +size 63404 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..436c4d558c0bb5b1c46389ad0d05bcc1685a4893 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e4853b23c4f28038c6d05b72c3e0e505d3f9ad1c5bebf45a53eae452e04f99f +size 89097 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4ad3c59dc655005ba07f16b5937119da2f522c6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d266c0fc42b1ac31c4de85f02bc0c921f2ec9a30d940d7de84161524a4a12f +size 36438 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..199a35b57fbf7f957748ff678c2ad4a7176d0351 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c81f628e1122ae1f6999e13e6f2761f18f6bab6c32f0dc1633cc8a136903c9dd +size 48033 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d4d1f42c597c73f08e870c22ac3da19e5ffef8e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e95e5cf608109685d963e2d9f600aba51753c610b9cf99645e7272dc28cd32f +size 64911 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ddd7e757f50bdf378ec702e8ffa617bf8626af1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bfbc7059af94201c736792fb4f5df31c34224d2d32a0c533922825a36f0cdb0 +size 33324 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0264443cfbd7d49e6e1cb9c878b97c9ec96672cd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d53876713a055287c7b23b2a02b4d4d8941e49122a284994143245949e7244ad +size 34755 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d7e34ae2b1d696551a800b1765790fd3be19d1d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f711d9765782b866a4f78c6f669090cf56c2c83184c82a71dac8c68691a969a +size 34676 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd3e3622fbd080da4adb0a32909d2f6d924e72c8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c86082391d28c564b9c1f64d25db7e903096aa62809053c52846dd35799b707 +size 19318 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..905dc158a261b1ced994b171b909d528651630ce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a46a0d85b5f5b172279904fbed4b5a5f284eb852495ec8bdc04487c9d7701e +size 20727 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9634605c39e6dcce4cb8575f276df7eb5a44e35 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93dc273482cc71796e2da5e61268bbd12d07e575f72adb87a645740914b38a91 +size 13233 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2e55f5bce7b7a9f92a5e5cb439d32526fae9c31 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1503819f39d8227e3c2db1265c2f1296686ab2a94a7ed34cea0eb30876852511 +size 34250 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9aeb6798cb3c0a461d1484374331adde5256c738 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3323c343d848c137c65a6c9f403b0d90d89302ec0804a664c201a49d4e309ff4 +size 42377 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d701e265bf4440f0e1bbef7cb872e729dff97d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c41bfbcd379bca9c6561d061c772245089bca58b02e173521d03c345b9857e16 +size 59981 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d8f34c1ad56ef811f4cfb5a5bcdeaf41de75ab4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32290aac7b7e10ded9f66d5deb67423afac0c163c37738fbaafe2d47b49e29a1 +size 27809 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7dace88c8b63ba83ff3b6006da46b1dffd9095a8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f28c9387f70afa033d645ce6d8cc5831b7c181aef0074d0e499eebe6c8a1bb0 +size 14714 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbfdc7675af28b7f0e8beceb018067c5cb0b75f5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4804886812951d6913914cfeb31e3cde962ed18f18449038de191b06b8e34c6 +size 45137 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f505d58664461530cc51cae875dbcadf7be08cc6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:465d0c7e953cd231c68b9533d7f20e8d2d405a09e9de3dabf1f7434b4208fbf0 +size 54176 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f399ebad602c86b868c4219a52de621350428e6d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaef612c9fb31bcc1e33a2a504d5315171c95b2ea311eb7c4acdcfed3e10034f +size 74381 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58c01a3c572e132116c0a5e1400aea68cf771550 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c57477728a607658174cec0240c423ba0348cc0e6c995a64b1b5d1be29176d1f +size 35008 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d17f2b5827bdb363c614cae90eeb29f9d8bdf999 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4bf0226b4544860982c10ff9c97c9e156a7b3ec1f1d28b4b320d9384657303a +size 39163 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44a0e12959ca93313f29adf31c01c91607f5657b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606a51ff27cb25bbdecaf194443e47ee3fa46c59c188c432f107c3718a4aa4ad +size 89123 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_267/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_267/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f84277e8243caba4a799beb6aa4bf0997ee35d4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_267/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95f5f3f115858d771ad1f01d33c967f3c0e748966161bfda7809258286ce5f4 +size 21000 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62546114c877bbf311f1d36077ca3bfe204a51c8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd47aaefc5f75df3149a46545ec35c013b85bbcdb2585305ccf5639c7598bb29 +size 16653 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b7c68eb4abd41f6e08bce4bf2726ad7c1e86bd9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653ab1c4e1875fc7d5a8c6131813fe924cead8171a73726c3eebf932d46cc449 +size 75337 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..538ecdeadf86b45c3fd63755dcf24a2368ecc2d0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a6d019edfd1e4c273f806e39007cc69639f99bacfc08b6864e07c9fa5940678 +size 26708 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6711fc65f2714ad5b57b515d33fa50a74b5cbfd3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12128dfed4c8e8999c24f67356fe31e1a90969862125971dbef854a13e24cbd5 +size 43902 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..880c8012693b0e737886db5dffc90acbffe4efcc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea5dd64a292772d5abf4ef70d12aec95f9311b3cdfbea22aa2a869ac29d06f2a +size 45519 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42f0212aae5c19efd31ddf0f60926b28e8321e07 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db06072a7eecd8c22dca47387d60a93f97798520214075715ec14d0f6dccfc6 +size 64075 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0cf16711aa1a9e4ae8741129bdb41cd607e5777 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca0c4f43b628bf6bac20df7224200954756fcd7a5de33e21f9ef5afdc0f7fbe +size 84842 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57ffee628a01134f2a4b34f0c5b5caa1da50d064 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a417a24c73841f1f39bd6e0a846ce0140bf29948f06f40569b3c7985ce8b6c4 +size 36531 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf34962bf8b238921f1657b9920e4636b0e2a8f8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b39ec43291a773d1410c22ea21315db12d4a136445abcd9241cacc735a535cf9 +size 48233 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e8490c678cc727001ca8be0dcacc00670bbfc6b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dc60c638549e50e99adef1e231b9ee60e1ad8b2d394973b6a50768c7dc30a58 +size 66161 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af35d49fab48db47615fc2a04d5c589f429f6107 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2804dd7a4bf606255c00f092c4beb3383b6808c7d5013ce4436473eeb524ec17 +size 33163 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a055a5214f15943787fdca47be95ca3cb5e8f833 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19982934e44fe0bf479f0b3c9421bc6718ed417421049777def991fd9d04cde +size 34944 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b10b940b7c43c3ea806213a5fd0186fb8f1711f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd21568bba3de6e6084146b3c719ee089bff41a94274196352065262304ba500 +size 34297 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67be42eb0b69a1e6056054a101d8f0306b5d316f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9dd35f4fc34f7d37ecae4ef1e57242e029f7e0108aadcafa41cf235e8d7d99d +size 18956 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1630f0d5e86ee60cad9c9fd842f497401037ff6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2bc27970812c95c3ff415eeb1bbaf1d0ad970eff96671c33a817708e795bc36 +size 20770 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c8f9d247b4cc6133191d10c46762ab91fc26c17 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3995e1ebd2d199d4af2241478492ebbf8a75bded80d791986272188583acbe99 +size 13358 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b6fbdf0c9a8021ba8f4802264c6e9ad3c62a237 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c61eb42efc233e4334e90e83954875ee3751c14afcecc5d850fb5a91805601b +size 33806 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71db6cb2682e33eee01efa364b33bdc2c80acfb2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d9d1c520889e7e6543d6aa9cab7e0786d6e3ad9ad71ee9a17939c92f9af054c +size 42727 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c3f4be412e6b16c97263b7f7a5ccfdbbcd97faa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4bfb038faa841ef724c4a40f91bce5ca9c47de7f5c563da28c4676598941b9c +size 61362 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ebec0096d9ae7e577171f6c21a83f9ce0574303 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f5b3b28a1124b804f12da9fdcf2539d76498b401794fd183516fb12c5478556 +size 27667 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9dacfd3e4674ddf36c7fa2b8000902a12b6f3575 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbd05e4b9c927abf060f3f316fe4f81e1f38141a698779386eb03d86a0483fab +size 14742 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97816c402cd9d8176f9a7a63b454585eb965c7d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f3d90d31507199556c08191265aef6b82f97db617cb9d260500e4c25c0e03c9 +size 45134 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d37751097a73fe43996e350e33afaec237deec3f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f58ce1591fc476f9a61eda01c0950a4a1d986ebc46b2802268059aac7d68a009 +size 54040 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53906eaf201d79ded0668cca8c0fe967e41b0e2c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5ff093bb0a76c973bfabbf795097cc7e43b7e0b79503165c3c69d1eb1fa2f77 +size 74329 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0dd2ec83799cf31e42c87b7a404aa75dc200864a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3cdd621dc2bb9526d4e9834a2597dfe28d0ec5a0cdac309fdc2d3dd88656984 +size 34850 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6392bc139fbd2087d05262234d34f28d69b6d6e8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df08ca5a39d5b2978d515d5f5217dea8e35eef6e7289f68d35eb65eaed1b4df2 +size 39118 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd54d55a44582b7443ad9dc53771de2e3e69a1a0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2a693ee6a96f79c89fb5d6a90dfec643d6bda690910704083c833034af37a4 +size 83731 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_270/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_270/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..201031478f9ab5eaded60b5027cf96ec22253f86 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_270/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e183eb5c7b04f9db7701e171b4112122034135ee60f0f125128e3a96a56285a +size 20952 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..761033984dcfed9148c847a4c1b2508afc6862e5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b7db8f71eb29f108d1545221e2eb4c7f8a07ae511bfd6793b914e09dc790c76 +size 16449 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3efcfede6eded866ebc0201114c474b15bafe73f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a527fe539dff06b3f7d3859c713f92ee2198b62f8e138a1db0e6920718bc879 +size 74992 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72f43087c4ed04932797b8794366e952a253871b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f3f140423898479c0cfd58787ff1a7f8204a6d4dda11b0ea1e6a52b08019c18 +size 26844 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64180dbeb5913feadd6f3e91e8188e18304f36d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c3fbc61005cea3b193c06a49dea5e95911bcd0692b8b37582dd7f0c9d5f0915 +size 45929 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f42019f93fc4605d81c9c55ff2684f810cf1fa0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d459d226dfe810509693e8d425d3cd1380386268be557a1b8f00ea7d805a6bd +size 44669 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1812d8501e486f3739bc17cf20ad0b6dd0454ab8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc8cc4b9c5e8328edd4389d49600729dd83bbebb028f80c45ff95f48c8de3db0 +size 63639 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70864b8322e5a2244a0fd3a116e6aa84ace4ab7e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399908f793133faaacf0505ea216c466e9f969eb37ce33f020e26ed893622877 +size 73852 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cf0f92c02c8e8212f7838a5323c61da9d575f61 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc001812b13f62b52c181fcae18f0a39fde49312f462b934b55b37357f376941 +size 36424 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54c85697232e62354b9ef58c8164c8dc58de3267 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87e30bea36a69383d7af655b1a82aba2ce6017a4155beffe1cf06f01be60aea1 +size 47574 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9ee7e7e3335ea59dbe28665b1534794a3cad7c7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdfb43fb4c501835abcc230b8433d7871ba95e06836958299b51ddfb5847b800 +size 63303 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2463ca36c62b9c5c5d73baeff3c115dcaf656db5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9cc8571a788fc81d82bc6eef8a0094b49550d6e795e87d4e32d485f9e63ad2d +size 33267 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f27dfd5a9a300ee5f07387c1b91987fefee0bc7c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2404860359a177b631fc0ca02983562da37cec9444a52c6cb3b7e276c5346880 +size 35059 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af9af18e132f3a811d6bdff09e1f9170007ad888 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6d86e05887461b4099fbea4ac552306a2a367ac6071cdd1f29a7ffe91eee89a +size 33624 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e485009298527b6e9fbd055cd0785bbbed6fac3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc6268e7b2996d83c47db8e332b9098b456992a0ade2d9e1a46e8ef875e7f11a +size 18961 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cb6d34cd4c1cc488c7481d668e4e4ac18b09c08 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e363598414926c4e2b944c41881eb5c6af1a29df370facfc8b455eca7bfa7b49 +size 20778 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea06f0d1167b1b97fa30a8ddac52fb9c5cc4cc52 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e66afc626aa282452cfc36955ba416c5fdb781d9de168d8ad67bbf68963b650 +size 13480 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fadfd27dc662a40a5abb9918b4531923062e425 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7cc0ebc9d5614560ad92e9b73703a050b6655e6e574ab53d1c8016d704df754 +size 33503 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7df8b822877321a7ee21ccbccfd459db30730990 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfcb5a890b58a6b17452482242f60ce09d77318e3317f0a34270cf389f64ef8a +size 43209 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03de494fe16bd4ec3759b3ddc2684f1f1f9c5729 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d46e144b3f8bf62ebcc52313e111d5247831ccd7960ea814e780ca8a31a7bc9f +size 61221 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1f7fddce92652f07ca6226d3613213e625cd4bc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c1eb5a6e0393bcb38d3e228ca5dcb92105eee0ba157714f3590b38e744f2aa9 +size 27146 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..815cccd9a64e99111c0c1c6664f41a91f1027d76 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb95ee31fb8b0f1394624fa76b954b8ce287a0499f94a06d221abd1fa20eb8 +size 14829 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abfea80e16db807ff627f48fed877961e2dbb033 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00cfe6fbc4fe08cda0e744c74c5411a9f696c840dbbc64c583b996e02535bd3 +size 45085 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d1b0860a27e540ea5c8e2c71eeb660dab4aafb0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52e4e56ab8c41e2382686732b6e3a2785ac90381914d5c6dd9d023207d61c686 +size 54033 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e48e8d00bca0b6be9df7188b17b72d919afd511 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db5d09bcc27901cac2c77d00c122e0faea361b282ffc0808d7bef148c17c0b61 +size 74140 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4daea5bf9983c819f4acc26b22bf29312bda605 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d043e8c5f5855b4a9f54bdfaa3e5d08a7928fd85ff25f7f50eb00316d409a3de +size 34856 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c66595950b2346b5722a829ee594bb77dc83740 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0505d241d21ab2f9a53736a24a5399f35d6889514119500d34c43d2a4b0b7acc +size 39097 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d952e424d57091344ded6abcb30830890e09206 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b6a7edc472902788197e4651d921554b247bdd59149315d31fae45f4560c0e +size 84128 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_273/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_273/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d165b6e96811fbf6d8ab660b8d81a785f1580a10 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_273/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2617441fe2cc18bdf68f6df56a8f1a99d57d387b9130029e263b6111d5e29ef2 +size 20987 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3f1215f42f54ed8b0435298619c9b8690dd4e3c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ba6db968b4ec521607a8bfcb6f99ae7e2be38aaa207c3228c625fa73436eb04 +size 18653 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..307cfd9ca943f4c81e486c054592b116a91f61d6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52f08d20acbcb040c12ca6fc5dd0c3ff297cb1dffb693652e62c8a95b35de74c +size 74437 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..365bb462fbcba8b69696884191140ea5fa6584ae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be7ed1f6a2a8f751968f497a032be9e7e6c0b3ffdfc50f8d92ec595169bdbdac +size 26834 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fedccc92782f174b7f8ce1668d60c41ff8eca7a4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcbe250050f704828b47ba8af7b897f59cf303ac56165608b093fe80169e194a +size 45569 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1037a81ecb84b16bbc12c2842c365de1f712bef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1225928e2b2c462096d7889859898f04422d3e24a9db5b144839ae2e75ed4081 +size 43071 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..145ebf5bbd9b7362fa75608f6b992d37fb486541 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0040c79290f4ae585707b23206c51a3b539e7f55a6791a73fe2a7dde0f9fff4 +size 64126 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d586b720c02064a169cdfed5ad38aa91741b0efe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:490b8ad1091898f23ff80087bb477a9acd118f837238ba0e090377e6c65d0a2a +size 81164 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94eb20516305f4dafdf48abeb25a3459921b536c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc08b2696461b12270eb276fccaf3aa61657c80c582fbd7dbf9ca2d099473eb +size 36592 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f81e9947088f6f62a762411117aa62d3e0ddde7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef4b063bc4211897f749b3fbd741aa1b4786ced7cdbc04dd795f3b8bb326980d +size 47947 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3c0f63f3b4389f9d123680f7bff7c1e0c1bb8ee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77805089f136b19b7b7c20df4ecd0a74805bc6ba2c0136dd4e9f8218b85779a7 +size 66109 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..845b10d055320bd257e411e06208916ddb022626 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba364c52fc40f3bce9d15fb69e8182669316dd2cb27daecc90ad62cb678f2e0f +size 33385 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71c11def189e75c1d3c92721985812b97b6c21b6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69785a57b25144d71f7adc6ef465bed3b73cd3cb1a4553b8d93a59a2b3ef75cc +size 35057 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb80080d69c387ddd60c14531e30ce83bfc6ae43 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b66307e6049e08f694290a493d476d6f72a5603d20cf22059c0d234b22c8d7c +size 34407 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b65134b82b07223c1f5725b37ada716506edc8b2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40c3b469f150d62f0fb12330a5f030e4a6e5ab756979211be723235a55baaac +size 19286 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9635f473c77da0b4794a884d1b8808b9d3eccbfe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b81be0514db9336ae171f2267e523056dc7730c2cee18e8144945ca92d9bddb5 +size 20839 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c6736259a6a7a776a27fcd13c8a30633c4ebedf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1c712b1175686db7618c90193f10a0799165c27dddf6b70fef4f4a53f204f4 +size 13461 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c26bf535846ad0242feac8a89bcf4221ecd88f89 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c67dc6ea8afc03048f79f9903eb806ae8009293794d6bd0b2fda99a210fbc89a +size 33991 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc04cbe30e48624324f05c45a7ed95751ab98e1f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5f5095b929ffc0a6faeddcc62278888578008026514a0fe7571ed86ff1fb6f5 +size 43184 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f52ffc4dff467f7c5ed525557a01c92cf141be24 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3719341678ab47be4e8a0c201bea02a1b248ba460ac92505281fb9c366150f2 +size 59515 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b410517392b1c5f48797aac3379683113d5e5192 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6db22d28f507c466f5c0ab16b6ca8a632c870b337f3509337338ed4e82fb8d86 +size 27464 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be5009b1995f4802cc9c7f59ac8a29e025e8127e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1178e03970b1c504188e6c4d49fbff95a46ad6f708122c065a497f62c98ee965 +size 14634 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9296f3f86d618f9e83ab9952b2e32b063aa3118d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cba1101d66fcd4e02d4a8454bebf1ae6ade800589fc6805e4fa5eb66c48f5280 +size 44780 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..871492e0236b972d7d01d59226694cd80deccf5b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4625319813eab081edc301a8dc570496b15a0c308501c66f319703a29e71b1c +size 54023 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2db38406902ec5236e3558cfde33310c4a2c15b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b121b2f8e56d4ad8df3b9fd8c31d82b785f9c99b7eb9a4779e142abea227944 +size 74280 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5611c347aed2a432fd818b993b8e762b3c941b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b99960e1a48c0060ff8526ade63e4b56cc4b225299c639148824a8534115b116 +size 34761 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..176e65589f06056bcee299e85c4d472a31810dff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a39009cac412d902947c69e398b1e9a98b9d6042f9c508e961db86bb4710c097 +size 39109 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b975b1bf840b97d8667095d54f447ba37e929b96 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c3671565ac5518be5298a8f20b6c26624aa3f684129f06b563f821007340e6 +size 85307 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_276/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_276/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c00ae269b33a462f1f17dc88a99984f60b9a5c0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_276/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24cdfd8c8d7a32a9299c4e877486b186b351ef59d168e391b411596239a76758 +size 20984 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bdd434b74d52c0fed28f4f03a52c69a6b2e7d78 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed3025daef218adc06dad030a2639e218cb40b7f0a1f522da5bf7f4ad1e5e4bc +size 18573 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12082510f1eceeabd28667e69527db65236fc0df --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18425a6bceecaeb2cc31d858207d57882c27c8a52b04f52cf055010402762f61 +size 74642 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3979c9ff2483ec5bae2722065bdc4248bad1bfc1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20ce5e277217d0bd04bc9a122c90a043a80c385261cbe2afad297563d8e52db7 +size 26669 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e32c84630d137b65c6279e6f37428243c7c0635 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a647e9f9fa252938b7fabaa1113671bf47bba48ba957887e8b0fda76b41fc0e +size 44394 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..800929bbfa785613d6bea174f3eb3d4ed6c195fd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea024391506cbec7ffa9dc31ddb127742d00e4e5a7d8969a85643170205a590 +size 42646 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a55bb5f0f919a414ce818f5de373241dfed13d5c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa82bf51d0ed0a1b685cb0639cd4852043aa5d3a1cb2908fcac23304ca26dc53 +size 64016 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89bbf62545b96645fdd668241184506177d01d21 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:982c4fa2d5f3b35c5ecd600b9a3cff8d3b1e81ce6b62ffaf9002fddef7537e19 +size 83892 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3345da142cea332a730a6b445a2e9d56d7c939ba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8d0873feead6e284dec78da70836e79d1a779eff80da7d3e34a36e831a359e9 +size 36633 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9613825c964d47bd26abe744eb82bc4c2533806d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f046095b21843f21f878fc14b9a5335c8e4681ab61073b629f23d109276013d3 +size 48269 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..040c81900f05ee9722e888ca26e622632362f074 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c272d1223e4bce7d1bec9dc5a27640980ebcfb398001414ae1344304e2a65b +size 69022 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84fd29c450ea27bf2995605c05bc05c3a44ba214 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44872286f683276a973a4e37678725cb23245a451240c99f98301a94044a76ac +size 33253 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce743ae875378816dc1ed89e81a79c41f0a5625c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fd193bdae2796a929e4ed719de1e9b9e48650a3f829e9ef8cf1c068985edd73 +size 35417 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa3c764fe96483ecf27e0936256dbe621073355f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12987fd9a17a25a7380d87d67bf849dbac0c67718584e4f081bf1c474abe5258 +size 34482 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e74337ae2657f7266abfe446f83d13376253a51c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3e160b7984562d300d17d90cf85c212c7d7155a0bff478b984c63afcf53570d +size 19474 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea0d406003e1d9aacabf3ee7f1af9abb69c77713 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8bc894021e07a1f964efec38575ce9e9413821a82ef21a8dcd08d80d561fc32 +size 20756 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..513bb10b0dd0d6d1997e0676036bbe20747df882 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f35d0a6cfc8973733b20fd5673e7b0b018a31f750b7f61482ecc4b347b77e3ea +size 13554 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d9323dea19cf982dc08e75301d9a7aae6bf6c8c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc23195cf501c99f2f123874fb773aac69330e6915f5ab863f67a730c1a49b9d +size 33992 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d828bb8dc3ab1cd74327b0ce7ef9a673ddf541ba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a5fbcc42ef49d0e9d1380e41fc3929e05a6cdb4751e41aa7d4a737b1e04abe +size 42313 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91570ca828ad09d2ff97860147ca05d045e584cb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a88a293470daef571109d2b782763220709ead5a0a77ed5aba22d5f5f7a6598 +size 60935 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2aa6817181634f06a3246744430960342c3eea85 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2544ef5e4983f67d21eff0ee5b71fd78425b14df4e82314292e6294d8962555a +size 26997 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..efeee20e22652b4d43fcc30f41cd3d7a01ac3e24 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7376b40faf28bf60375bb48c9ad2b3f480c089c989f47b7f5a5dfb0da612528 +size 14605 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b96c805f144430d0fe576596869de40a07e580d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac04700bad02ec04b6206ce5257b95159a9a7da67c071790ff527434668c7b26 +size 44936 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40e203408915dfd2d07c042a16fab23eec6fc238 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:840c6b80bee2a2ad97bf74d0e5b293e1ac59ea909bb20b808f18ad25dacba9c0 +size 54116 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c07f7e6483350b44becb127f19a8345639fd1c5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f02ae58c2e70544c4c1695d87a7c74958d873244c3cbb3edacb5db73383eee47 +size 74398 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a87011b363b7c07e76b57e91ac7f577ed15d538 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:337d41f8585fd9135bfa4baa17d4c1bed68157312e76dd7a5d8e61be4f036a4c +size 34935 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebbcded9e48b6539e01bbfd6e773d6d502cb6aaf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f9fa58a56c6a4b33031dd9a5122a0b2bff89dd4bcb3e674f65268721bedd785 +size 39099 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..404c1e3cf4439a3e3616baa7a4e1347df10fd673 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e772dc1451d60bda3284915f34e42d358026a2c582a32b323876ddb3e90982 +size 83395 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_279/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_279/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2c3b77688a6542ca7d618894e3e738667357b49 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_279/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a275feb27024fec097b88aa207cdd6129ecd4e1b53d06ef979bc7050f80f1c7 +size 20957 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0456818bc01b288bd4cd56c2a15337c2131be06 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e322a8205d2b66d2c3b59f7301a198340ea5b8d272a8349bab6f2e520eb1574 +size 17684 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b2632daf66a992e2cb0283cce996afa2ae38345 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8db6dfaa5c49a7f86a177a360bb09e4cb35a209c022fe4299252bfb11d450d +size 74417 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75e8054d5532ae96f857d988d2de5dde3bf44398 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0b8c1fbbae9058c15b3c907c3c2f61bf72dc41f68cf5601e42289bf7cfdf3ca +size 26995 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..455ace29c518eae2fc93d2718c1918dd2987bd9e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:524b92b40d241a0de5a00d5bb3cbbbea9275679a48ad9ff1f6a66d3e782fa17a +size 45288 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dadd56d4040302a6679d4cf82109cc1b80dbecd6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdd29ba90d68ece56705339148ae2449e0aebb1eca2f58706999e7722ec8cc48 +size 43960 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5f3fc6241302e043fc873e4e462161f2feeaaa4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfb724eef0b33bcc2fb55fb0bf29fb22e80bf75c5ddbfd7a60eaabea0e9ea778 +size 64086 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c761fd8c1395a57316c91525dc3e695bfb25c2a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb49a08daa2212e97a3078c236dccb1f22ea3ab44f4f0d56d2157f18e75df36 +size 74643 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbd7aa31de3c3384205386f117180cb78d946436 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e086775f567f488df9beda91c32346cbcfbf09c236944b3b2be31176439aafb +size 36770 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a021e008da31efa326be8be48eb10105d131b0fe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12eb89c2aa0d776788e8da291e4a1b369af5612acf7f7c5d6d4b8a25fa84f414 +size 48210 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a33d275ef8ebc9353ef3b6f9f0fda2ec5d718914 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5caf11ab7160438625c3e3ca1c0fd2edc582e49c67690ccec6ed7502d2fbda5e +size 65944 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6e7c1b6229e24fd6f3bb665127ee2d4f4d987ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:981eaf08aee3953433fef34f2dc607b67206120dc9a7499db3f12ed26be5a49e +size 33291 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..222495db5e014a776e8bace32689e8c3c1e36208 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7728fa3c8204e089f55cbe77ad973d2218bce4abb3e3ece4bd2eb00a124889c +size 35322 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a71a80d37ebc5bb91f2184d09b242b3556a53fe3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a5fb56bbd7bd853357c13713bc4f13c4cbd2a43355cca3b59254ceff9808c72 +size 34510 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9003407eee1ad3cecbf0f4d017fe5c7f42c6fc1f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed02570686069a89fbe8aaa450779e63b936c0a78a9e5d6d2a21d6af494cfae +size 19117 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be8c867ffcb50eec2b29409aff481124794a7879 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f7df6a27436935664b1c8bbb31db10f1cbc932ea11edce369c15b84993a974b +size 20720 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6c4fcff30cbce389b2305e6751186237a7e5b22 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4954f96b0ef9dfd21f975c699a5ad9de90d3dbab8346a93ecbcae3656682b0a2 +size 13411 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af273498e7859f46d2f04a58cfad211f505e4a44 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899dbe6978030b3b5244687dae304b045afe81be0ce6001d0a58fb8927e4e7e8 +size 33432 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37701ed1d449343dbf4d280f55a7298456e3e204 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033d6ac240cd2a1c75202be70b935d02d84958aa689b8cbe6fe3ef5afd2d4868 +size 43158 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b33933b268d61c4d155a2b5f6da046880adb6ae4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ab9d3608a21d0658d6cf3dc06eff6e5a18ac9c363a335ceb9023814480463c9 +size 59816 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb3e264b4c95e0808baef8179146ab02674cc436 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95659abf08b1c528747e330ddf566767c8b62405d4bd6693102af2dce49d7f9a +size 27435 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..314b12c2448cb109339067a95756a09ab91ad053 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc7120fe1239aed9b836f7cd62455255ee20796425e34a98d82a4e1fabb41d0 +size 14670 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..630074c49fb90a6b2e0524b1cab699686c60f303 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:692fac47f255835bf19ed133590de5c9ad1408fcca77fd8d42a51f1c288357b2 +size 44926 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ddea7ac0d5a40db6da9957fbb4cd53a70124e3d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eab25c9c900d9238147a1bf8ecdf8cf563d8f4f3a6dc2180342bd87a906afd2 +size 54198 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84735c890f810a9ff9801809cc06256d4a14b8fb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae92274f736ce78fe87c98319b1797032411158841c71810f11e5cebc221a209 +size 74343 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92550b846690ca2a2d7195f2153ce354adcfa18c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64b59c8b633ed6d25c26c92cec1a9d50c09b072e5d1f63bd062b40985f1eb1f2 +size 35018 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ff34ba9536d5781927fb3de90bdafa5b63b27b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3d1d918c3dad205b93c9d60bffaa628176781a876d9ea1bdac67aa6026d0b0 +size 39064 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e722bbffa1ab668e609658ff6224b9fffc644ef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c580b5273f652b99d85530e2d93b0b3a48e2f34dd24285290f1cef11dcfe3554 +size 85837 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_282/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_282/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44e0ee5d41bc0ddd684d4fbf8a18638b7c493edc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_282/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab681acc31f178e866a0ca0ed7100df8268103b7b1065c016fbaa73f0fb8f64 +size 20930 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e35c33ab103b305104bb5029baeeed2132f8b668 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:780f6175152468e5ab04d868c47fe0d53d2fe3a94cf770e57bc812c062e07b1f +size 16614 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12cb75bedbbfd06b5c92fcd485367f21a0818050 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f79faf5b418b9b241a7491fe7b02c3bb1e26b78e36c26c7c64e87a677b826a +size 74551 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b62635feb941b3baa833ece42920de18af118fbe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8098a2334781044a6ebb151a284aa0028c98213194055d49a145f035c57697 +size 26637 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac3165cea51a6b5b68f3188d7b013f6840613af4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1cb364d1b8d8ac05369981db2e63d4cc24e79a9375af073300864d2d5d3a4db +size 44574 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..614e77b083e87bfa7c4f10189966322bda9d472a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8e95d2204377a678602a53663315554a8c360de6eadfbbc0bb998f7e5acbf6f +size 45043 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2aa9f646452a501499aa23a5ac0f5b9ad0b283b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a009f98c6053ab25d6c224a624c9b1928c6ca28ef33b1ca4d35d24bdd1dbd5 +size 63268 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c38c6f9c2336e69f005c00877640264b59b7914 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b156a4e4f8b4c4d8e660af6b0fb7013ad5f67ed8dca5033b2c146ceee31ed543 +size 74146 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e14be16b7a62b7aa6d9adfb13578b57e107a4f13 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49dc4aa11ca56e547688dca74fd499fb2561b29a2a0fe221286fb51a457b2d74 +size 36508 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00b9bfa5dda6def44d0273bb45910d5849130048 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4c9742ea85565dd2b5d8b4c259858f6c21cf7de7300a71f74c2a81915e050a3 +size 48021 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ff28779dc39c9e54ea88316cd9679c340734666 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a0e648592de1a2f365cd5e28abaea6098e423cd6f60afec8e03948e66d813e +size 64821 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb56547a0fd0cd522a531f0224dba9c5be1be81e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa110a4f838b3df1952ca9bed3a098e365448c059fedfc0b40bfce519c25c545 +size 33346 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d23ee548907625d89e347721896dad99a29a3ee5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ad34da830e7526c9b4895bae897b0c2bec857023d480456c642dbdc77585ae0 +size 34603 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ebb4280a1485199383f471265cd197df7b1896b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd76028a92403edd075bc0f0a0ca5409ecb034caf8a67dc1f51bff7316ba71d6 +size 34887 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2953786a05a707b99d7b1109e06741d875fdc33 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1deea5320b99cc2566ce900a7a6b02a1e1f5105307d72b55c23bb4995af7b5f9 +size 19409 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12233479e96ecbee4ac13abe3d0045d1a61e2326 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce3d003f463883614752ed8a5410481b6e9acf30b79e54324e64f159965444c +size 20747 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1f066c497be40061dd7a3327ae4ede5be7d078b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:278f7a30a1faeda93670331e43bb7a0a8c0de1648451bf386c235a0b5dba334d +size 13591 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57cf1ae346e1848c51b1514cf562356cd9a2d26e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5513711f31cf7f6277b146e6f38ed289abeedb3125aedfb163b6586cb4cc22e +size 33343 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b26c1a36c1bbc1b296c92d36d5ffc54e0a03ceaa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:477edb6fe80250b8a39c45ee96e81f01a65381bf3892a9e9c30887d6253e3418 +size 42999 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cde9ce7aae4304cd7973d98c76ad60ca72accdfc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:314d650f57eb4c896bd9c68556f76a4f413ad260c1c5c35e3f98c6b43b213e6e +size 59451 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb99a10f26f80a5e914facb02cb90c9d51616eaf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a675a58fc62f548809550f1029a6ca2340fd7c558bae97ddfc6e22c82e77191 +size 27551 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ca761b8d0c98aa0e6e8af784452719aebeaab17 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0b71a6ab735b146746266e444b2d02a31a026df557ca0cc46845e292c85532 +size 14727 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a90d0441b59bb888729c1d74b714222f1c78ea80 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66618d939829b6b537e25688e2b7cd55b89a7f4727aecb208737e5a53b2e35e2 +size 44953 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94eebf58cb5dcb84cb3067af35db72327fff7bb9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e893ab1cdd205d6fd1b5915369d5a69ef25dfad24aa4eac42507a927fcdb81 +size 54210 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7a2972b9a985c0a174e2d6c986f49e774753fb7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b0bbf9630e1557af90e98b078545012a10cbfbdbbd83c22e39dda2f47cb9ae3 +size 74402 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..620a94a57f65e8bac6f570421fb332ea606add3c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f16722a43084e3819d5745efdc59eca107ab796bb79a465c370616bf83a124b +size 34846 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aec0516fcb4518ab97c1ffd496f9f07f69cff019 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c003fcbd709a411ea4dfb23aaa7b569f9d710547146d5b769331ec7db2b320e +size 39090 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84f0655d8baeb1bab0bedb5dfe9bd98361898007 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db5aaee5b6a946952e91f748c54a0556bdfeb1fc636e814e10162b563d8c7ff7 +size 87029 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_285/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_285/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06c5d0b9ab88df9b996d82ac0c1b1e7988b22962 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_285/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a517c64df827afc297d285cab5cedd81f731c202d79fba34690b5c3849d80da +size 21010 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e3b36eb0d1193c724f836e9a2a502abf1076184 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d3a79322a6ef9370cd78cd529d71bf2f9ba6c87ddc7812bc60cff45a06b300a +size 16665 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30458e7aa63d394ca80909045ff48a68476df776 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:192a5dbc44b4db150ca71030582a50b5a78b900703c97c063038518c1ee36cf0 +size 74832 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9985d4c4a0a7f4d01826885b3e31e720baf05251 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec00dc084f17789775c224645d97875383e94c7d69daf6185fcda9c74a948b71 +size 26789 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66c8152f04bad64d71af48f6c07b5921865ed306 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872bf506342165bc3ecc0b6e011c49f59d60b4808a43721a1ffd0cbfc183f684 +size 44886 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8d8c857781ce6c093eead0665102e9aa4a720b7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f4e87eed9a6fd94f0890d571d5e34b930f0514bbb0ec15ec263a3d70cd6c14a +size 45123 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..142aeb5eb9effeda2f457318f622fb3000607806 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e5c85b96d73bc906873eb4f6537a0f1d6d762e14f7c45754ae22896a1936c68 +size 64205 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20caf4e0c38ba323c6fc6dd7100e64754296966f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a9a2f3bfc749b7ccf55354c7f1dcb12d56819ba3ad55d9388e37edb11577891 +size 84838 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e819be55c00b8a438aa96dc8dba983c3ea9f68b0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f7943d18885a6827568561901eb8c278e92fef14234001fc30b25eb11f0dc4 +size 36497 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52accf650edee49ac1fcf83aa4ad765478aa9ecd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:407acf6923627672cb211b3bef914353a186bda225d90a3a6df860a92c731ff2 +size 48414 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6916533c07a37d75fdeb6f34eb238ed703ea31b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20df8b8cdcfdc47f09b8e5b817997e3457025836c2ec273f648dc64a884a44af +size 69182 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e67db972454f7fc3b16a0e7275068b545f7c59d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e999f4b29e1481278c7738a72b9bd556c03b70f47d5000cb34d606593b57898d +size 33182 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ea928b7f915e0c1cfdbcd809cd44fada30ae969 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d1925de8c7e5ab94f535272bb5a1c0f0c2bde1996d8e78c7778f93ab23ad824 +size 35018 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0b15535fa431438cd3acb57cd5efb327bc38969 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26a0b50c0d4336d53cfd33b8c8d38ef4c9477a766f270fc383e3a9fd3d3c62f +size 34297 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edb05ec9b5329d54ea606852cfff53e29ff57e61 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb835f8aa077d040aa97a6de685e1cb94092ed739875a1472fc4915fc581ba3 +size 18942 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1b2c93ff55135c9a2ddef53c6fd70b71da0c488 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ac5d38074a6c9894da6c202abbc2627c54640771935ca9f6e7d777c943ad726 +size 20814 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2f42a3d1055b1ce3d9e34c4c4d2af5a6692e4e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a69748b4bc304455ce54576e60c5464d95d019f20977bcaff36ad868e03d26e +size 13355 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac4d379855637a5eb1fcbe6b460c5853b9d19cf6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cb72061b485ac21adb000b87910f52add110b801b1b96528db98adfee5e7ac0 +size 33809 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f716f82aa4accd0b94183e7b1b8f146b39409e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f91052c47dae1b0a111da7dabcb089cda3f3ea3f3539955827bd92841673668c +size 42756 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e354ebbaa5ac2f0726283ac68361de07b97b244 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11f64d5f291109e896314927850e78de0e0bb9693c8179a12f596b6bb2b740cc +size 59571 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fe5f17a5546c0a3ea2f9eef995f550668c99378 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed0d3f7876bd48e41e5c74e5f31e5479ad1481ebc6987c7d8d0d268004d8f32 +size 27306 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a21594d063459a09e93172f5df620c96c1dd582f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323d44283db65ffa9b0c7891a5d29497d1afa1ea0b61688f03e49b81bf278895 +size 14699 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f01cf932350e74cf31cf209428cc8c249afc709 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2647fd879b5575bb11a9d1bb520e8c5f942f108625a4e242f8ca32da2ed90d8 +size 45012 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2a05645f8d9080e15868d09832536322d00abdb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0f900fd3c50111675fcd3d1b58165c255f335e53b1579369e0035db0b39f39e +size 54275 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ae398ae4c4d2c86804ad3ded9a43002b7b33e1f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df42c462c7b3528b43a68e014e8ae610123507e092d6e09bf856c18b4a66697b +size 74519 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c17da87886eeff3ba1f714fcb65871790f95f38 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb5b89d188a734fc54e3f8d12861cd7a1ebfce3afb3c8423bed5f9175ba9da89 +size 34946 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1fc0fa0894e36060afaac0231259a09df9a6d49d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d8c2c1d98f39f7d9cca99b66ba490c3428f0d227721e9176a02c9ca35f7c238 +size 39096 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..352062bc13a2b8a7551e56c67d2989e286f382c7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b6c2d0187859e9f55b2ab0c688d82029d9b19cb1aa57848c2d80e24ff713dc +size 90149 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_288/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_288/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31ada625455bb99fc225b880973120015a4a060b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_288/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0451baf5d0ca524aac0e8bbe7d17779c6d921e5084ba9cc818dec93ca33ef389 +size 20984 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a6097b02086328cfc009eda4c89d3a15e3039ab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be85d237feb6eb6ce2e47bf28c99e416c97139ec034cc489de509b76627e6074 +size 16611 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0039cfeb9b38fccc7ad9d8de8505377b242e4293 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:857548e1ff3402455890cac474916cde9c06bbce1ed028b571832d61efdeca0a +size 74693 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4715c6f37870ada3f79659890506e152aa2d88ce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2b009af5d4e55f2bb022118810efaa088aa9fabd715cc6fe2b9f8ab58d67f0c +size 26733 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44a728656d748d0ecfa072b21e7b7009fa8577da --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5143b9f85cfac74044b4c7416ea521139543522943bd71bac5f1e5de5f2c746d +size 43531 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..686879f53ca853934556faa97d6c157656e04407 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:779cb08b1a37263a7c6e96d9ea5e9e5ed86aac0463043fa8b7e1c20a3d44c353 +size 43853 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccdcee08b3c452715c0d8f87cdc6076e5d1779c8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef73fc87af95c13137ff8abfae2605ecdd73e5b92fa5c20cf73ebee7f14c32c5 +size 63932 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1ff0640d8cf03bd2d8860670f28bb1902a77db0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935799cb3f3d5b7261e6855e1c70c46b0c6b26b9a898deb56be990568d69fc5f +size 73097 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c10806f2076d6a8573487a2f8a4a29527e53c8cb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85f7a599ef8efe47f348aca5fc3c7200cc12aeeb741dac13b0b01c58534c4233 +size 37192 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c958051b068b06f408a482086d982eb7d5c8f26a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47394f48472bd5212b7fa6b2d1fb4c33322d5fbcd8c5e068ef5f7d3443dd1627 +size 49225 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7902fab148f852dfddb43a6f73fd21916e6c097e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a9046f2ce259ca941e82e25f6af8975047ebda3b228c777cbfa9ea88d47ec93 +size 69229 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfb7426dea25eb0d950dff61a1dc4a2229470eba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7750a681a461e67f310496d289253a34fee7a61af18f4f8daa20c857cf61be07 +size 33340 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbb30b2b91299c7fc992701fa8f114650146e3bc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed7d4b62d430a121bb4fc700bd50aa13933f2ed9eb901b0459b9c898d17482f +size 34737 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b82c14f3f9aae91bf8e453aa14c1e946b9c1afc7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21e6bb26c3510cd887c4a8a6e830c13fc45503c776fff0d54f551d641783e332 +size 34593 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ee70df4583fbe67dc6a38056b803582cb6130c6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:918e0dc54650b44d7a320f89f6a6b57c69e4b320969f6e2450881338dd20e2ab +size 18998 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0d29c0cebd26eba85a43a576bfe9827ab4dfcf7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3724662cd0b57392a569c940b052176e29b5f2195598138138a9e64cc0e98280 +size 20796 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c395809e9d359f43de8aa77412de3206116333c2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f05c8e4957f1b2c2afc42c3e254d081ed6b6f39eae34a86bdf28b35410ce7acb +size 13486 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae1660915b9cdddccffdde1b005f38da08e89f5a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf9990b85b0abccb03b4270e5b95546998e4a9e2ff99b27f574d8348efabd2ad +size 32958 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd298bc3d17476389781a93730a108a25895cca2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b0a554ba7d730bc2cc95b4fee7f85d0596bc6b952c26f8daf170776cd980e5 +size 42717 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30c385357e133d765b9b0bbd35383de960923b4f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:569dc6c2be46abdc7e9976fe4de3f332eca46fd904652389f526fd92450c6fcb +size 60668 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..237c89b27071b0879fd94a0d14ce1bfd7396c88c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb69285841adf2ba8e9e3070fea1633f1187627639cf4513a7de707cc5f0b5c +size 27266 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7772c3faf062cd566012249c4321e40444fb6d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f92eb1592a42b103c8dd6846880959c85cb100f059174183500907d6850caa +size 14634 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53c22e06af115d61aa88cc5e78870ec8b10d5c3d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6bf00e26e5336af1acf10d186402343733ac40fc2f79b7eb0e60123563150e1 +size 44994 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b95e6db92c9bcfbc950c77834bfad06c4ae52d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0025834e64db335c9219f870de457a5e9c910d0ca50924c4a3b9e879713f7a4c +size 54222 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12f8c04ca1f7102d0fcd0129da130dec5920bbeb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:440b54fe923bc18edf3572b27e28194cc869e8e4a9e121b2fd54b3b7b2e0b8ff +size 74301 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..340852cea0427adb5f8210075c87c6c40039c85b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3be1cfa93c0fb685c0169008206207e4fcbb3442357038dbdae8dd14adaa397 +size 34946 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e26f8e447a678e51af57e55f693e39e92b4aa321 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae4ce872d44053a24d40f03041522945b0fbee7d3264555f63cfc65444ce8821 +size 39084 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3909a7a00b4a570fc1b3a1d2205d57acbb0d806e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa31fbb090e7f61f453e83f995164fbc1c4738b95ca5273b7861a62bca81fc1 +size 87376 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_291/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_291/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..762e4447f56661e7cfd704a8c9f652054a246a7a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_291/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:163aca63eae023eb2a3413723c5641c24603603817867532569058cef27b9dbd +size 20951 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a545b31f45f969b825db7de6be4ea408aa999be4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a30d195ec54fa883e57293d0ddb3cc608df5043fa728611c0e3b6e6c249bd0dc +size 16779 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b12c3a52e574157d955ded89634cb771c62df8d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6dd5f475152eba763b318d36b26ebc47e5dbb5fc7d334580bffc6bf6f91b545 +size 74770 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08f48eaa6f491b4106fb7136f560d847a75f3d89 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d44035321ba392059a83f66625931fad232a35113ea84f6e90e1b0962450b8b +size 26684 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9314ef53552ca868a7842f15dcfa201739c9a23d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:942e1ae468e5c009bd2455870d67ad5d8f0265e019b7102660ccbfa4cc410998 +size 45858 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9cd0e97be82d2319bc77dfa65915bba4fbe3d7f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c178ef7f71f3ff0330d66d40a4d8aadd3490264867f45cccf62f8afd88e9e93 +size 43537 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..262de928d76072bd9a142d61899e0dcda0c55e29 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36d0a902f513ab99f13f58da2902d2c8f6022c689df9adae7117fd7fda8ac860 +size 64330 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49292966e8c2d5cbf9ed97b540409ba52bd6ec3f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5710066900f2fc28dd3b5e1a5a1a39afe1c9877729c6b6e1b816068a5dec8d34 +size 76053 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a12b34923c2bd14e2b242fae24670354a9b2044 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7494d3712e2db62d85a902da1a8246b838a953a6ce997f90078bcbef34587f73 +size 36800 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47da9165db071a554329489848d0552528af7b9b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc8b7fecab3bc109390f03314a5b5b5b3cbce99b4680a1f402969cbb98f2d018 +size 48267 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..023e2112b9f24728319dcc3ab68942bfedf6dba4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d2103012fa3ada856bf4b1fe7931ef259d1cf773e28769d0016151880ba744d +size 65616 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f160ffc19e35e5eb4d5912cca16f62088571daa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ef6b63ac6510c87890e5fec54172b3f5913cdaad3dbaab428bc7a4ce4b77231 +size 33254 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47459a277373d3d67e2078c869aefe31344d3d96 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ca96d858d1dc4ffc719f474f1610446f869bef69d33bf0e51a85bd7197a13 +size 34961 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9300646bf0d984c2b75136ce498103a79208edc3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc62af1d435c4bbe99542c174d88881d6b4c104c282bdc9d8eebacf4ead25210 +size 34685 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a729af2caadcb4e1e5cd6ba895e5e3b6590f64ee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a1800d2499e03fc517054a054378ae2498cd1aa4b713ddd6032c9b931902831 +size 19087 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbd21abe953e44e46e599a22c4c8a2cfb775b9e9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f1258e1073f49afb69ceea7237ef8453d7b9048df9a207f568ec826c24dbf60 +size 20874 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ff6ac130294400970403a6fe2f6733215341c16 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50fbd9330ad1c624354a7a06cdad765b391e9417dbdddea97ee8ac91d9af6cbe +size 13485 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c0fb570c2716a1e0e14b4c13e4d08865b2cc0d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dae154ad10ed34e97c7652a19ada649af8f614fffd2197699c5d16afbe8cecd +size 33666 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b2fc6e46b4774e697b5b9d0bae38cc5ac2a8eb0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26280a1f7947094f3fd5b5ab16ba19d9a58351d1319a790e125689d550cc7d75 +size 42818 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a16d53b732fc84f7a2a763952a81a96d457801cc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a069d8faa556a393656766ecee23197cca40b152788b10dd588b0b9ea558d9 +size 60064 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eeddcae2bee79d6570d57a29b210ee1fbae36c52 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c51ee2875a112e974519963078002fe03ae668800895fb747ec9920dc99dfd9 +size 26821 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c3f22f53b7dc1bf55e2cc9d85e00f8210626329 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61edf73ed7f25d149bb8bbc2908567e1b14a1ca15a5078337c0adfb193fbcb58 +size 14654 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aafe228784307804ecc9b7d94dace1c5431cc99c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab9547cce969620ed5f797585cef3bc2b84d3ae5563466a5542f85ff595cde63 +size 45252 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b20a3c73dd6093e4893c184620dace2021fc6837 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10429bb729bc4242d1753fcce9f16f8970c1e544f6ffdd5a6f9005667fd9f04a +size 54190 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2d69c8ffd6e6ff39c8cfd1097355ed04e846ace --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:730dd8c59729c35b9bc9477399dc1dc703e6349ada66c9ab84b1d4f741a66ba4 +size 74439 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2697d5bbac91c7f0c2df05e58be753a4af268994 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c8cce5dff233bdb9462fb0f90b64cd92ce7476db0803a13f7006d264f9888e5 +size 34886 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f49f8a976470678c8646adc9bb2190768c843f48 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57693adf1f3c5eacbb5fc0bec65cebbf22f00dde36d700044513d724d882d693 +size 39061 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6169f9067f9b9c5c8efb0f71b9fa9d8eb826cdea --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:968f4a239e0c413e8ed0e7f8a02e8e145c451f255bf2aa91a96230417afd41c7 +size 87484 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_294/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_294/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..538f2e31534101b61970a72f972bccbb7bdec873 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_294/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d7259d85fcd5ab942dd66e499ced81b7f5e6146f8326d704e42a1417f2a47bd +size 20991 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..591ce9c1de8ddad65d26ce077878ee061dabb99b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:378ea87cf9aec6a4433842dbfb83691305d9086c068bb6dc7b148a3b2cdfb148 +size 16619 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1be54144cb7df07f6de3198eda732b4b1f2ca170 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e4e037e4f5307659b0be512a0098adb60953cbc622a3fd982bf1de0618f315a +size 74732 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06bf9c62a22790646c8a3ecf8be693e559b03862 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e7b509541b68c0c8c4648e7ba3de4fa592dbb028354a7a456c37d14f295666e +size 26692 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7db894f97c9ce121743edda68ca027e182a6e98e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8deb934d48742a0b4b33616e708ff9c42488334558d40ef1c1e93373cd9b64f1 +size 44156 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b39e937af7a94b4ceecdbb1f860897dd6dcb9ea0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:708eba8debf22e6177091503b1e13574d605b40ce8563dd5ef0200f7ddd29da6 +size 43896 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3876784d77bbd36a42877ca176b28b43257ff17 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b53344d003c65a72bb5f063cfcd629e0c1d7dc30b34cce7182dbaf689945e78e +size 64134 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05a7c2d4ef23533c122cf63e9e6a8da499c6cbed --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12689ed7ecffbf75e90a50c2ef45ab6bcc452943bd8480c63ea69d79ccb5481b +size 77868 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3bf3367cf79557354ac5a9bfde9db83a92b36de --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f6d0216e331caf7bbde6db2be2dafa5a1592a77fd70510f2ac477319eb96ec7 +size 36877 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0eae4ce4ac9b98600b55eb0762158590f5a7f6b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:823d4f8de40bcbc61035360f18d63377fc8e4e5644bf3639301b53da83ca0689 +size 48211 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b36060e4af5676676c3afb5377c161ec63e01e51 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e516aa2c0949df1396bbab71714f0e3f625b433d4af4063abcc06cc6d6cbbd8 +size 65600 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e864f2596abc3d7d14daf6e2b62e9f0b0f16450 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef7571efa4a73d9b2e9a9267a579f89bc051a57c5abc1a8eee62098810a19135 +size 33259 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cc478f794ede7b4da9d9929edd4e1ed8d55180c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef89a82471929ae6b72ba0dfe40bcd735a4a427fc735bed0b66a31215604f46b +size 35381 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63f3ed5f9039a6429dda600db22dece27478a3c5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c19b8b9749cb2e53a4d58cd62c6cfb66f2192beb3c6ad0d9bfbe40e3e082291b +size 33862 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da24d914a83b538d9ecda9e994fbb3a2d38084cb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d255094eae532d6b2dbfcecdf045659b6210278ee8ca4514b3ae38a3ebee0da2 +size 18985 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7cf641ece5c2a6f416ef7554cb5519defa9f1c0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95e5b9e037cc47568da7f9937f8fd2cb5ae5ed54ee24286695ac2d2c185b0811 +size 20742 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..746a02e9a1f871f0b9fee426578e62c98e174dba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38456f383366b7b68fce67f00ccd98019d60c35f8ff6ce3f1c9a0201035d0e15 +size 13513 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39e9d545cd6c4828eae4f00656f7393153e262f5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:840e652ae8c75d65961f79cc38c2ea6deaeff107ead4d551206007c61620e08e +size 31958 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b76a0aaed4b58a8b9743a45c41fcebc5f40f23a9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a333791c22ad6776dc1924f84ec0d28c67dcd2eee9522be04f8cc69a7cbe4810 +size 42064 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..adbcc22d8bb0644a250e986c3aede1558e883383 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc725aa4c32de147c79ce60d86f9a4069c0639c1d70f48f40133adb5607f17ab +size 59201 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8ddb1a44cad591fc10f9800cc5206b7b181a917 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fea55922d716193d89c160d833ceb2612774ef089aa8fd266cc36066c24d6f13 +size 27534 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9983a0370d4147ab8293ef6b8c1574549c7f3de4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a14c3faa1b40debcbd658f681bf9103b11eda0fc25249a2095e0421a8473c97 +size 14623 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..281c340630ea89c2c5fcb6ee4ab6eeecd36f92a7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca015817de3965b563291a18ff62d91a3d54fa1004772417bdab9d30e2c2f4e3 +size 45159 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c19da2783bff8b5ebc5cd051119ad6391deaf9db --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644a3b1f1d3fc9d23976f07b86fbaa65c5d6f5febfaaf3e3f24b9f4ac11c1447 +size 54142 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c24078f636c7c5cd8afd04ef5c46b92444bf6dd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67121097d9dc1c01c1932955ce94396bb29e0496d88546d99b851af5d306926 +size 74274 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..938a7541b36d15e64816d42e80349d4852cc6f41 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18b7edc687001466380eac6f111df17bb2339182a161f4ee8e7935fb096580b4 +size 34884 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5755856694c265975af1694a0fb63ea13ba35047 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b56a53e03142e256a932bea5045d0c779c34e557d0a1be79ca479213dc651dcd +size 39060 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f34e19c76b3e653af68dc43d5d4ad88d11001c8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f85ce27cebc3b5800fec658d7d5ad908389f9fad7f6d8f3810ea38c85e3dadb +size 88445 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_297/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_297/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e7f47a59f57a536b4d5bb1b2872ae49d2003854 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_297/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1869781c7251f65af15b2eaab95019dbaeeb8d874075505cf23bd37cf84bfeb2 +size 20949 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fbaad81bf5da0f3a4c3187ed1827dfa47eae620 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b05a899dacc982d8de4a0389265b9ef70c5a79dd60fc954ae1aeb1f0a5a2f014 +size 16092 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b39eef448c6825bbbf4248b214ea33ecd07f87e6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:901dcb69683b9113ddd83045efac724c2a431f6a1fbd895b307b6f89cda7aaef +size 74674 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0286e5278c942637ba60e2a7e6e23b5f5874db93 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34eadd2bc277316a0482f99a24f49a8bf28255a99b77d05ce12aef1051541c8a +size 26727 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76809896b64229c15e37762e3847d52fbf9c41c4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:925d0deb40df41c8517fff73c79d301780134d84784e13d04008db3592dcaf82 +size 43948 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..631fe098708a01083a3055dc6befe2acc6abd1cb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44acddddd9cff23c20d1ab7ce2f4cfce6c75d40ac16d1e2ada4c7303cdd2de99 +size 43984 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0866a23c658ad0a5bff5b6fc004a5541ea9af591 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed1d7ce0eec093a3969475c2e5f4a4b51e5cca08ad799de9e7a3d35ee1f6791b +size 64632 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5cee290d91dc332dc68987510a087267f25c7d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30493bf42a14012dfa2395c2d0940d7162c0edd627c347a769051af2ea2cbd0f +size 77028 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38cb3b423c2797e436b5506a5db4fd4bfdec2858 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d41869188e79a83d1568522e5f66a0927cc70694ff0c46bbc31060a06301772 +size 36531 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4d8c506e6911e9ba65b5b9df8e15217fc2a09ab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17a17bb26085dd701af9baa21ab81e5fe1038e18c2aa23a901afe7d3075e5efc +size 48366 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09e99fd038ee60711d67b81c6bac1dd149268e6d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07758579ebf33637548452270a9c58674b014e1023d1a62b1e3864476e91d26e +size 69893 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de499594008bed7074ed919ce545ddf8b3b61d01 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85516dd7049000f89e28c82c50441e532a85d9562fa3a9a09d5321e1a0ea1b9d +size 33490 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9192c533cb077ee9314f65bec134cf4bada17001 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091355f3d2bc89e1ded49cafb2ddecf3543a64b3bfc2264e37e3826431aa7c5b +size 35406 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..236daf23cb19e8b3ba2ad564e03667d36de3d8a6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c2594ca01cc54923fa9c0b51eef85fdde73ba671f9d21af832c4fb1cdf2babe +size 33404 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7caa6e3f786ed328291dff22e0c373b03d83c815 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85c8665508e59244f39f56e380d027a0f93260697cbd9a418f5d19ffe5e3db19 +size 18837 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d74d583f0670155bb814d6e4c49ca7aa4bebc0ac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc14ced26208ae84fd783c92b126397c78d06c21e4f1ae05e2220cc015a7900 +size 20813 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4fad03803f2c724eb5cd3985b8635fa946883ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f657dbe42717e28d141a7989894483961dae30a3a03867f8a3962b201d60dc79 +size 13376 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..863598a3fe2ee64e92dfa5a9717694d6dd75b763 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6a7b7cdaaa25c014db0388b59dd0cefe51abc40965aa2705816764328ed1540 +size 33107 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a49ba2e58df62ea971b0e652a185bc822def1998 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92ec53e1965e23f26c9be4588319e4feca148270f284f20cce62b11dddd6a7f6 +size 43092 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c036922789f03295b07dd675aac0b7697c3c74cc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39eded28a984b0cba2db0321a39eb2a390d16331b48b828db7f7ae4c9bf8a9fe +size 59203 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72e52410ab50cd00a9cc3acd56be074b65755847 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6307a244a22f6eae92523005057a6427ab36950909ed88886547a7eb2886d64f +size 27488 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3717216c1668977c73b0437529ad63f4dac57ae3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d78f2e650a70fdc9b23e0cddac19fa51f03f84bb3d3dc637f07e37387892306a +size 14697 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2761e0215383bd4501e91779d444cd627f642b33 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56412164e74a14ce860997e85d4570be804731d2d85127bf3a2a409a8e8f81c5 +size 45320 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..520f591f1ad74546b687722f1a52767b5c9d4fc2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a67930507cd7062dfc83510c30c25c37539173cbfa421993caa8210746d9914 +size 54259 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88e5bfb3e15a18ba329db664264b40248ae9bee5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3085b8df6e04ca7eb37fdc7ebb7582848c02bd9c010259526639dc32bb86a591 +size 74419 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7c4c24fcf498ffc546331f89b529d5020465967 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380d76bc7d356354dbc7be2fdd8caa0d4d8948956faace4ef246701383d0da0d +size 34951 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8875e49e4b01b480f0b29de9f5e30d4ea7baa2f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7ebd3ddc4890a3aee45b772350858cbbb4a05c17e2305b8f16bebe575c6b9d +size 39062 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0602167e898eb5088272123d20901c006801987f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d041fa7f708d0b9755526a10365082f0a1cfcbbe0adfc0860c6141c31d3b2d +size 85775 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_300/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_300/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f82244585a3d1d5183d68db7991ff82853de5e6f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_300/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3489310255d25d2d266b4236c33cc9031bb72c5a2e015329c70a892ffeef63 +size 20988 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b92c817734d5364f35c4311f248e45fc2c701962 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5188185ce172783ec9e35b395c3dce9c0519c2838fbebb7cd720440dbdca3b40 +size 16409 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a60acade9c885815ca7387e878f59e26399a793 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43136e8124dfb76e41bc777c2787b0699074fb8e1bc0489596ff053f8cdaa78d +size 74789 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..366dc9934547bac5804223db7a032bcad90fe7eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a8ca8b0ecf7e7af3b00c7be084dd1f351023fe80c5b8a18fb8c6023dce7e21 +size 26750 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c41bfb29344563bf85db21479985a628073b323 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47b32f7a413e09e1bbe5efde7acac0913bfdd93dfe2273c6d0f58e87526c949a +size 45156 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..950fdb2884e3655cabc408f02586ad3599801d58 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea4f62f41209e2d97bef01935842f994d1dc2e12da9090357c445e0fb76968c +size 41354 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6800443163252418e3f3ec18fb8b3535d5d978a2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c512ea3cb29a3869bad1902d36fa3c81764aed1a93ec24daf073cd0e7e99b5 +size 63442 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc780e62df7557de642e5c1ac3edb266d7279030 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e34b6e1c69f88f10e6752a6e0c1850c6c884281935da35822022263d8e65a9 +size 77443 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c375247c3bb308fa4f078d6214e5f58975e48f6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42e8bf83dfd84b43b7b4cc83a581203a7bf912947dee05db37beb0b4ef540b6a +size 36749 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b52912b3f5ed33f835c7ed908eccd2cf69cd457d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4b0b2abfd812a6503a9f873ad174043dadc4bb691b31fba3fd3eb9722bf6551 +size 47529 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..844fedef4252748a7204a5d1eb34d1b26becf787 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33287d6494c1d080157377c9cf7e62103d880e0d32ce13c987f823b11fbe8c23 +size 64240 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3667c5bc2862d1bc7c98bb6271c669d024e7516d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:722ffedaa8e24607d0c98fdb5d87125e90bb1c6a2570cea2019d109ad884dc8a +size 32977 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1112c86ddea9027ba50f5473e9f7cc62f5600c3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eaa35d3be153202dc8c501b50bf937deec291eed43648f2102e72ed96d27f36 +size 35043 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbb1fcfd9071d5f76d2dd5e39fe06cf0fa2ff25a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a96ec077c1a5226ca3b8f809495aff69c1254dfed57280c424027339e98521f +size 34179 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2eee61adedd5341aa0bc0bf1850a23598c20cbd2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff948126ca7f8ba97c2cbf456745e8f66abea1091b6bf77452afd388e5776144 +size 18908 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4d2e92cae0237ddc011727b7f525a75479b5a92 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e8155adde0053126b95469e8bbdc3e5111e46953186a08d9fe78f3307e0586 +size 20852 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db7ed4c987a3256ed3a04caa7929d49274e99e94 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd48a1f98404745833c2f3eda486c1ecf6c1fc09e7e9c44e3ca66456b7787525 +size 13420 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c899e3c6feb98cdcb38242a0883dddfe70ad54d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6453c2fab88547c2ff2c8490b661f48793fa92a4c69300777099cf0851d921d +size 32026 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d12ba78f28584888973592ee3770eb0538f47670 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ae51e9dd1938bfdd6164385ccfd92cf34273647647843047aa9b5d4ba41e8c +size 42345 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ddbacce3266d45eb2137f77afb736716816eb84 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0855d22174d49db496487a2b192b59d900af30273150f9a1ad7656399e064e8 +size 59667 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba9d556e750e38d717ab865203e179e56190f939 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:890f4b2e020c42b3ff04571dfcbbc60915bfb0bb4b95034c919231e9f171cb7b +size 27978 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df6138ff106aa4c3765345e82dd285562b2f6ef5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a64d497da1548cf044a84c37a69e3eeff36497a46b9ff38edc247cd0e738898f +size 14593 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87c577b00843b68e8fd58fce1cee268bdb9c2572 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d4294ac807ce9f1184680bf825d07cc034230fca984ce4b9a85e5143528ca5 +size 45120 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..296a6cd748d3b05208b6097f40ebd46f60548c3c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ee39630f27813cd7336dc9e6e5aeb237327d379e8ea85099fe92003844a0aa +size 54257 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dae05fa11056f9256688ace49c2bb5d477da96b1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede4da594253c442c1aadb1eea92b4e268f826daf78838b5af0c3de508dac742 +size 74443 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71f74b52e815d717ac5d8c10da28f329a05b84fc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c9483c10c2cc695b236b6b5ccd6b1c1488d3f61f19cdbf61953e6e3bdd4ede7 +size 34987 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb5d25af8ee3eff713d1e728f709ad72c7a1f5b3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e04bd92c2ee4659648b11613dbbfa4bcc048275df8c84ed8912354af3cdc0eb0 +size 39109 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d10958fb221f8d64288f7505a94f5fa81f80fec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62b15428ec8d74a5dac3e3f5982a69a85cf9707e19a60796e744be4c887994bb +size 86269 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_303/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_303/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bc760eb82879d702579d35e4a506fe1e4c90475 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_303/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f19a54b9bcccb0ea7cbf546471888fe9ccdddd47cc96b939b0529b79ead0e99 +size 20956 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa6f4af1e5574c811a3f0f650299d8a9e785dba3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aa24b9aee7abb2ced721c59486fb85fc1872171566a586b4346e6352c6b10b8 +size 16683 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11ea6afc0949d153057181bea7bce2b7ab0f35b4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ff6705374c853c264fd60024e11bf57f527de7f6c6e10a3f70c0ff13322842 +size 75133 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6364c187824735c1b547bd62052f136cfcdea270 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8cd9f2e9526cedaa501389c5f03b217c1e782d18d98f63e84becd19dc421aa +size 26625 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e8ec1b034812edfd0d32d20f3a8f614ddd52e32 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c874b9f8fa7865d30d552dbd7c77145a40e19953a0bc14cc7903fc84d8c2ff5 +size 45167 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18931d67f5bf27ad40655920f666ad7c9be92e21 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e7136995cc88f29f6b2b54662be8e09f6b775cf31937b2fb17545d8f0ac321b +size 40948 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..766768eaeb311570e9b770c82ccae52df4432396 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f957b3ad483be566fa3e055d6e6e879212475397d13e51f0dd742ae2596d16c6 +size 63324 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9f28344cc801e595884e730f97c8deda2d46034 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3631f3f8f0bc74dacfc3c17e06560f95db6012c3772797bfd76ef3d93d5b9801 +size 80891 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79276399e62c7d7acb1210b7b7a2a2c6ebf1f8ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3eb7014da1d603dfae4834a5b4da75116f4814289f53b381a51a094c9ececae +size 36435 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a1b38e555f6a3a522fa656fd86371b95421b862 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b1fa3bad8d62ed68648d5e23b73db99348ac060a70380d7a610311a27345e9e +size 47630 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13d6ba6a4293da2df3f1af804739584ff9947765 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5daeac038c404e07dc9ef5049fa30ef66792fe2de08666528a4a1c712bd351 +size 64233 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63512d7a400fc2389bb9f6eca076c5ad199854a1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de879ffd7ba259524afa9822815a5290db04b2a0f0231a8dfd638af71d054ee6 +size 33048 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14fa61946e527aef40bf381c184a2b8ccc8d68c0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22a9d0806338e367d26c303820a2f43f7f63fc1d96403740bc814f900498059 +size 35333 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99c8569f27c0ee322b2d9de50a67f3fefebb258f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28b9767612c627bf783345e973b9d0f1048df258db5e1d1e816ec80b7cd9522 +size 33894 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37724c0022f0a89f99d3de5b1350279939eda11d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5b545a87ff2284ec5cee8c22569f72996083a766d9bed25051ed59687f59cf5 +size 18913 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a57200c51b5dedefe2a3c33fb198a2333e6b16a1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e01d40a0b0dc94326b544b583ddf5ef4a4e1092a65d32d3e9f64bf8e234d7a +size 20804 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7341eb24e9660f1bc99187ed31045d91eb7dbc9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acb328cb00b7106d136dd6f5e56b1c7cf09061ce0124fc6abb3f28a994f3c41e +size 13257 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa727bae57cbf69dbc5bcf3f880cf30e3b4f13eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18fd9432507b009cca80c0d1b55a8bfe5661be71edc3639ee200a28085cc58db +size 32672 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad737998f6c5ab2b65bc0bf99eb77f80ccbd70c0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fad48a1f2f573bf3f13125e28b44490991cf5cfee459ccbb147b50c02f72457 +size 42882 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ec8f9b9e4524923322a3a85276663e013c68225 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77eea764dddec3c4a8dc0ba173536da2c9bcbba9110e46d18a91c874801df4ae +size 59742 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36d1352623caddfe114e8e6533ce20aad8cad9ef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb5747a440663090a5a2aa91d64de174a3cfc16a8b0448f48083b87d35c5f9a1 +size 26970 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b33021bf58bf5b7e75a81711facd71be21297e4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bbdeee627e55dfa1a1f369adb813e50dccf6da064ce46cb6430249905c967b7 +size 14535 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f96b81ea39c586d85412b79c023a8bd37af4a19 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:448831a28b3b18693dddfeaa2ca999068000bf09691ade070a2281e7b2f1509a +size 45138 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe3f8f62d97026b8562815f53e12e818eeb2accb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:746226917ca5e04c17c6aa17a84a67e9b7d37ed55a53c3af4f37c31c019e4a1b +size 54255 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f150405381e8973bce765a0eae59eaf18deda24 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27fbdb09bff49ae7f56b61196562767a37f1aa4251ae57f8dcae41c4e79d3ac0 +size 74476 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f19a44aabeb810815f8ca85016df9f35129ac2e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a842d6aa11de73a2b172cbc76c14a37534a9a3aa383041e282295bfe3d5a821f +size 34985 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88afabf00cc1291557df6c1817a4cd1c7597ff33 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a75596a0d6286bb04839535d7db4f5b4be244e11bd5743bb25bbac15fbc7d5af +size 39090 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cccce2b02c06f74477036ad36774ca9c2012186 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c08c422708ab4aeeac3227c634ebe3a2ad23d3ab862aa4fde459cfe5732d61b +size 83359 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_306/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_306/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d60a56c1ee79b0308c593984aa684b777abeec8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_306/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94c09bef75669f3633ede4bb32c3b389cd1b97da6bbde56667ce87cfa3cb6ed8 +size 20998 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fef663481355837dc5d4b076368554b62a9da94 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e9df44a4e801caee50e433e2911a7305829976f1515ab158d623f9d4f8565c5 +size 16773 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7873cd95db9d361103077f857a24847989ad589 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:012fae58688c61c75e5fd92bcd3e4213fd169fa179dd9bcad2ae75fd08797bf5 +size 74681 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a8bff9b219cfff17fc6b21fdff93ac67dae8c76 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6546dcbb092cc9a089d5be0ef1d684394a599f822940074fee742c13c411089f +size 26560 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92d296c12012fa70ac26b544fa1ef3798df9deb0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a14a8aee84256f111707cba05ad56be7f7e280b8db6ae3f22fb0415c62b7ab6 +size 44753 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26f65fc710a1d6719d2f4c0f1c75ed9e6fcb61c2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c07441f8b543b6e61d84619783e2e893a3ec7f5bbe4f2860fcbd262adb7051d +size 40592 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3c81b6beeb9562fb9abde8dd377d773bdebed05 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1dd3bd4c0d7b7ee60cda39436105c363093491261f7c14b9688e870e6d58b4b +size 64362 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12771676031d98c43440a27dc20fdbb72ce2db6b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788aa1c1ae31c3c9855c5d61079ccc6d6ff4adc4dc79fab3f017e03985d858b8 +size 80114 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62ea462385918bafdb671fa65318ebbf73d96e7d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf51a70a67f6c198a61b236f88dcf8ce1844b4ba7be68bbcd6779cd18a0c952 +size 36293 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90b1aacece4d69cb01fb6e65612d32977581b76d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f08e7e571fb756a2cb61c3e425d59c00609cae99a615ef46c0067ccd0c7040 +size 47548 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ca87641266c43d9abd36b333016094cefeba908 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4fee13d9ed4606c20cee5253ded033caa7e518abc63f70f14bde2aa7bb52ba3 +size 64563 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c58cef149a342aaf83fda692159ed8d1ed43ac4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d2d483649a994e6ee75a69e49fb108fa1173411767523d60fa17b9c4d935d3 +size 33286 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..396c5a17fbb0572c01344f1f6831ae9d74ac4379 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b386452c5712c9920ee7502be6ce107f9d2137a70442b7c0cc47f1053d4a154 +size 34826 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fcac35a4ac1817ed829679f21b89b9d0ba126e0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac154d3960979637d2d03ffe88b5b5babe9f7372e444517a59c9edc59d9289ba +size 33457 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4cdbea22163cb45f0f5ff6c097c213c3e1e04906 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42678fe07b50ccb8b1a9b6bbb440c142a30badce6a55efea1e46fdf6f0f18994 +size 18872 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88807d6142f0aa0bd2ea274e0c28051643264cc7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc84089ff1a8ef5bbcccf4b5cd0629b426a04e4cace48a6a779100838777ea60 +size 20808 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a2bf00f2d7f71e0673bee119d2e4504f9b56eab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f6b2e8c0c586f2dffc8078191f03891eb5cd3ddf389e2f078c882dc51a5648 +size 13531 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33c119ce2edb88c0059cb04782d2911bc82c37e2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251453bf731c58810984afb9ec0f1116ae7f22104d8f75671e231d8f4bda8f11 +size 33097 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84152cb05241814537aaa7d4104dd73b1d8df4e0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ccc4f4abb241be6e6570e8aaa2dc512a9d59d92794e830753787506f24c5cd +size 42080 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01dc2c599ed40737f8d4d2632198c9e363c76944 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ca3859844a23dec13a3094416b766cbb73a6c2195b77ed46eaa6c3dfd98a65e +size 61106 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20c215c223488cb635aff2eed173858d72fd3df9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea62ed2c5f1a567e6d9c3ce4a754c80861c2953248e67b0962111440c2a05ca +size 26985 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..435cff41864fb5aa781f7b4c8f9da37eb1c3b4a7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b02738383ecc602714c333d259c3a3bdb95becc9dd11610e1d2d4eb6fac6db +size 14762 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78e8e1935884da8635a0c1198067a5e75ca5b1fe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfa3b2d51e9c35b3bcb280493398cdb159c9cac3534c4d5e6cd3df48b2d4dce2 +size 45106 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78d1657715016f5cc33d81aaaafb092dfccccf97 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b863b7142ffaa7681b0ecaec2ff67bbb34e73be528b0bcc269ee2b9a057606 +size 54249 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc6a33f6a966829028426e9dbeefddd7a819db9d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e2f651763da3e49a9381843ff31d177507a1bfbae559be8e584dbc11b54f4e7 +size 74310 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40b8ce0c2b181880049e3a5b261b2d878a4f64a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35aec48374213468f70c890c2c1cfbb36059bc5baf4547d2196a6b5058c99691 +size 35013 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab2d830344e843f451233f1ce57a9ff48500e7d6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f19891059c902dbed1962002df5e3f758a56ab9e6dc4336d458bb8f01785f088 +size 39063 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dee22443312b17d3594e5145ae75e4468787ee6f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa0f672181130685a9bddc744339df6887c63cdd5e549933b64fcd5ade4e5104 +size 88128 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_309/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_309/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b7cd1f16f38889c0608fd935c25d2e972ea825f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_309/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6b533798acfeb3d1238d18faf2ab493f98a3f58cad62ccba407a99abad6866b +size 20991 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f066b43a16819bf5245bd9e9c80ca766451689db --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eace69f924a3ae45939e5c61ee8c420401310b9ee336f6661916d9f74b0cb282 +size 16545 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56aca83ecef98986ddbab805b233de9057374bee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc802a7cb5382741f9581af0e140565c7eca91f4796bd3b3a04ded2c37f17fba +size 74658 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9d7c8dff2f201e6128de91b9b30e77ce02c7b6a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:355fa114aa0816c280cc45dced6e38894ccde9dae6951512e5e1d1090347f668 +size 26533 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9e52e31a5acd2a4ec549a975cfc912e75047032 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ca5394733ce7d62d6f8d95b7d2c8e62fb7728742c081b2c7e3d23612ebf91f +size 44518 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7d647cb83680741eabdf9050df18c59efd145d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9c0b8fa9a231907080d0efa2fe57b3f438901f5b5d2dec30e26013968b70f8 +size 44790 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca7c8666ff7a5ffdea8a58156e2b37af466381fc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:686b5544cad3785bfa7a7b2bdf6a594251dd6bda82117387ee76628c34ec3518 +size 63914 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..283ba6b97eedb7ab889c20ecaab744705df80e8d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529aaf96e9b1ced162eb531b30bcc37b239c8ff4c96deecc1a33ab1f301c9e31 +size 70580 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..048d317a62b742d108da2da8c37a0a89c8f8e1b9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cae2aa9ef070cf44715fb71a063d152a2764f4d4eae96312415eb6ecfa10f47 +size 36427 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b199f9079d55f7395ee3a8c953a840892a6092dd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baee5c286b5e4d6a70fc7eaa70182b2747706427f7aee4746e5399b65193eba7 +size 47692 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3ff2ef0ed267c2f21bd6912e391702f25d47a6e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df05698618469f68866f62067b0ff2b4d45758255970256cca054a073b4ddec4 +size 63961 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4033d3a5f605a622e2ed728d37cf524e0513d9d1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41ddc59202801d1233f0abf050b291fcb51db1682cc642e27534b6b6f5c357a +size 33036 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51995de26552c280a1ace23a5574d33da0fc3a58 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752775662cc5b00f19f0c8c248583341dc458315a232e4e702f39955b38b2444 +size 34940 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f33f644d62ca287b8972eef4d480fd9b621f87d2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e2f6765aecfb1989d213f1976964d669d02b6b0f2773cb0cc59908ab98ff74 +size 34008 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f53a8047a600f9d6e596a45d829453dd23abb036 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9922008afe0e82e326c9489ede398ed6897cfd59b651554f3716aceabe72cab2 +size 18890 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e35dac767e20ef88862fd6e4104422c101713ef9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c72c4c510a2caedd40d58c26d07f98b0069c6061fccc534e960589423bb7770 +size 20819 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c1efd171e90ea5f306988d9dad364381e2428ef --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5ce5f8f7ed5de8cc2cd153cb05c79971e9e62ba497a78429b2b0ab2c66444f +size 13450 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..750a37c2aed9f4c4bcc1e2e14db52e18b46d7b46 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95aa1e7fb56e7f8aad1722e1dbbdf942f79c5ca7cb5008a1a8c5289822a13b76 +size 31604 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c038b56e7a22bce7a4624831be8549cd49e5e86a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0792ef5578d9dc0b0fbee1f3c2bc7058c263fe3a9ae8ce304309b67575322d +size 43432 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7288f08d7eae899fd5e43f5ac92e3dee479a50ec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:375780c2feb8c26c3abf807beb89a77021105e24708cfc806278a100a0f5fee7 +size 60451 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a28db83890ea72577e367e093b26bceb98af440 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6811ea29a92c026555a5aa2e0f0e959ac9e85e913290072c0a5bbc86a1b21b40 +size 26932 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af735aabcf61445a14b79f8220147b3278c8b859 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d6a5bb28271abee00ee56944adf6a5776ab52f5f891c28dfa4006eef1578e9 +size 14526 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ddfb7f50c0fdbae4cc93e5170906da5dee4ce04 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91006b9b2242784e2c914cee1ce026f659160f6d06d74be8dfb66341b6d99ff6 +size 45180 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d952fae5c6a9cebdcd98d931c8db21a32e2f283d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a82fa7190177b89db7a3d93f7ed6690735d46d1a15448d22eca92aabb74256d1 +size 54320 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a78ddf14d7b76ee207851f77aa4d53c61cef075 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:441deb3d592d85fb1da9dd6cd5acbb1f5241a32e75a848f2087a2d2c86536d59 +size 74287 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2dd706fbeba165792b971e60e3d54a2cf3f16d04 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d46970d62fdf1763105aec54ca584a102470869e99213512934b5a03be1ef469 +size 35023 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa3aaf618207182489b0b4a8849bc961d25cd76e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25eba41d5a3326c1ff4e220d63b876320a8504298dd6fa344257f5ea6fe5a5fd +size 39073 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbb144ae49498ca759d10cc925c40f20383d2740 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add24a84c2232aeecb996f64a95918196fbdef5de39b354e560dba5ba6d9a722 +size 88621 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_312/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_312/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2282e8a3a5af31315287c44fb27e780a6257fb19 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_312/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97315106b105a200406a8ed77a2f89fdc5ac86a16afaff20f92e5cb97305c7c2 +size 20994 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8954e4c721702a8813747f3abf396c2a5c71a178 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ea995e6a67869809b1113ce9a567fb004089b5b775a3ae48c5467866b153e2 +size 16871 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f65424190e3f047703b6460c910e0ea4435b27e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115c0e939da56d6a2c74f0928e2467fef9a982806a1e3e2e69b8a05069183e20 +size 74347 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f68fcc2eafbd673f7a762e969941b79a774c493 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635dfced18da7ab20ccab57d54e2383c5413f5df27d4f6fa239daef2847ddb64 +size 26481 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..471d558332d0452699c7fc30489c9deaac6775b3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc9f99b01c4ac27ca920750a21ce880edba120e38b3ac8a91a8c840df31cad4 +size 44807 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a2c50982ba520a2d41552bcebaf5d1a46b8ffde --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd39c3cb561b1e5faa768c37c642967bc52f912a76c03a5d8572ea0d77183767 +size 43672 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb7aa0688c6fe159eb0c326ad26539d34225f6bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43230a38c1346628f0f3dc341f13b7acec56b07c42f23c5dbd14ecbabda320c5 +size 64837 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d7869247f8664b8f8d52578e171640c54bd5fc6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef922ee0ac3dcdb90b4927e5c975c1708a97c352951c917676e41543ca08a612 +size 77784 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c32720fa2c1b81d367363da25a828a632cf3f74c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1255ba3b968dfc62ac0d37033f0334e035dd7b55754dcb9aef27c2d4e8bf2b8c +size 36464 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9efb3e1ce2b02d490d8e2beb9b7c00c03684bd64 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0bd99b3d7438e13fdacdbf00a511869cf62c4ccebdf4bb438a426cf21947922 +size 47622 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b9a57900934b2d06011f56013710e9e47251dcc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43c15967e59c63ad217a4dde90560c00f78c3d0d4c238fbef9e95c62502ed8cc +size 64225 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d51ca5f6fe4ecfacbf3cb64bb669ad2bc22a89c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:379ee634eabce29a3e9b2c90f98975a1176d323d56304e150357ec3d8603cdfe +size 32915 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14698bbb17ca274a37e756e2057c4278f822c9e2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467c9d4739be9eb126cf3451b67c5d5534903effdd4a6e617359e9b1566e405f +size 34991 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..415571e9f382ba21832be273da85072bf6d28cd6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6419c56ca7685b0c8a2a298b0b066250444914994be689863ed99a8e3175ab1 +size 34110 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b81edc879b713b2b2b09bb56ae298d130cf7517 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f7968956f05d95a405fbc6364074e1865a728631101a8a447f7034c63f3e6c +size 18883 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed4e554e3cc318e2d5922d6663a755c7f95c3161 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40cceee20e83b6547373b489c3220a3f5f67b29ae4c92f60e876a8fadda00858 +size 20794 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2ff51e575053c528c7e039f90f3537486f2c969 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1679e38ff3335e02cdc68e8db60ad476dbf2a1492cbafff8410ed01a1965e8 +size 13866 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24f5dd4be9faed8bc1a23eda66995ca580364296 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3bcd1f31f36f9923ce4942a51a38982383421bc76af7341514d96f91f47924 +size 32687 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afe46c906aa67abdf7623e4cbe5802ca6f519e06 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03996fbb78609785a66303b565d2281613933d223258dcc5715990ad2a1c1669 +size 43303 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ede1f6af057658f4fac3dd630e85887dd969186d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df0b931b340a54b4e183a25286a63e391bc4c54772eae2585cee7bb60cf4b03 +size 60630 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65755658d15b77b7ad70a25cd394ea01935d781e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b85c6787d9bf6daac97bddb2881beebc5b331f74be4ec575f7e07390d4e17455 +size 27789 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..609ba160498af6b1ac23972537f345145aafb87b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2af18819f52fbb3af20b0d0e07d514c8068031a11e0a6adffe874db9b4e6651 +size 14704 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40a79f86b60bbae73846a23aa88e53879da311a2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2697487b840c9c9acc72ea0b1ded5902043757f5bf0dfe083f165c2c0b41d260 +size 45145 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9429b8d6432b2e4ec49d05dc42fcaa788b4e0db --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5406969c7b06572eed4c20f06a15a73764ff3a486193f981ef75d8d5a1a91b92 +size 54254 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..073c403e6abb43e1f148c2913140df815261a83b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f3a525eb7b9b1efecd9c0aab772de92fa57be86cf50e5a455c6fc887f88187 +size 74471 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6698b1d4d733b55a7dac1f2a1d1c4654a5b5515 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7e86f42f4ab2c7f30dd7e2a84f100b4e4149cd818ff30ca32b3419848a0c48 +size 34850 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5b83797a67f8332ed7bd3a3ea117275183ca2e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0dcbd45c34edde6ae921812c57ff2d22a3f9a166a92d8eb9b4aade68bbe6cb2 +size 39090 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e5e28e206960d2da8b4f61612544f3d77befbba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f1c3f8becf7363af3b4339179c1682be82c700c70accf42f0050c6e9578c6a +size 89322 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_315/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_315/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb9f5f9844790234f265805b306dcf333faef08f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_315/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfe8343ddff5037dfd640308d20f1894ab4288b6e5e19572521b6e8291da4d79 +size 20968 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0bad4894d200e980ff78f4804c6ae8eab080c823 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dfa45854bc895a77551ccbf72cd8a9efd56838be269ad43fdd0878ec1632b92 +size 16544 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..667c09413dbaa60a961ffa60e941e8a6706c3641 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7dd97115d2a4c3be4b1b3c0a3d0ecb2df7c946e5b3c6d7070833e0cfae0c54 +size 74638 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a1e4bcc69bebcba410a3468d5fe71a1411bae26 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7df147a54e8648c0843693ec581f3cda68030a950829d68d80ec9751f815ee44 +size 26622 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fd5862dc22cd129f0b7ac07d3864b2fcbcdf830 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e08c8b8b8d1135179c71c391ba12f172e4eaaf94572a139336c3906cab20a0 +size 44442 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8322c6796e137ce9c4a39a0e0afa78c4fbaf1165 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1252bc099a125de0f61621449b52e4466b3615d265b2a9423d78367562d4e7ce +size 44929 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7dac9d1b29a8b3aababc32a27ba8bcf95a585fde --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cde51c008220f8a16c2e140b17f98034861fc839ce76a5ea309953fc32b03f0 +size 65509 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad08fbdb40278ef45f432fd36b373da0bbf80dd1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b3af28711f22f6cae1059dbb36305db85c7e949422ecc378575fa24766a55e5 +size 87301 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0b257762c7b79227fb7569507d02360752bbafa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ca53051c98c6bddcbf695e7755f8d28d23846c563fa37c9a5b992a2c681231d +size 36645 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34ce95f5f32c16f7acf4e0703ce70e07103c4e10 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d499ab98435c4c939b9f3e6bf26b532b40f32b97413f83e1c553f4df21e3e508 +size 47567 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47ad0966a7001799c4e61eaf49e2148176d5c625 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d3eaa546aecccf0efcef44b0737d5547157786e04703ec5c40cb27ab525c96a +size 64821 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25e51664e86a1aae6d9868cc3f9fe39bf6838361 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:756bf7e7dcd0ea82cfbe688adee527afa0f623916a7a37e95ca8abf3d3aeab3b +size 32947 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f780e698c20377e7d539274b3710662e4b39777f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fae97c02b07a1e42d2d6a407da49c9f55c6b3afd57a37930a810855186c2249 +size 34949 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad29da40cd8b45d97169a16d5b2acb5524e0470c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0b92607d8c66dfc66509734cab16d6d05a9bdc9790c135c7cf60cb66238dd99 +size 32172 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80cc1f79671006803c49ef4a86c46b0b493a9c50 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:360bb2fe41c5e44cffb9676b191c1c3588ea9de846d8fe5f6edfa27af14c55b8 +size 19026 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0572f6faeb6395cbc406752ac5d0ed1abef4d1a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:337a75554ec2fd9a2ab162dc62dae7479f50b99e40a83c665c4529646e2cd934 +size 20707 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02eda9fdfb3b4d7865df15a055b7fd07d88067ae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcb054baa5d20a2fb020bcb160ae6edfbecadae691fcfbda79c9ca96ad11ac3b +size 13622 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05e8f48200a894df317a2a5ccc248509f3ea72a1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898bebd95f9338ba5eab36c88404728287fac6dfaee4c2aa748a0f3a7b081921 +size 32019 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9db93e7a5c5283ec16529ea454699f10bfee72bb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb7c358e39f43d77a8a15e122b66f211e0f1525677181d5de4990a86a6f5076 +size 42842 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35b85dd9a086564b3d5984dea8a1c5344a3c846c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba1f05e149b57b7fdd1e44920f9de3fbacd309d2be99a4e66d4d735278898682 +size 61349 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7135230e0f43ab8250a545eb9a35428d1730d63e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ce8595d1dae6ef02bb0d1c7e833277780b8f82fdcfca584709c9bfe3c998951 +size 27055 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcf3040a544cbe1a336389c7be54e7f2c2d7cd8e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e08ec3e6bd92d6a752e1672a0e55d4ba062ebe81ad5636e4f0c81682a5a92a5 +size 14600 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43468282617bbae500e022f63fab2406b01f4ec5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f819ad65e3a2445044d2be1331f16dab728bde0cd3742ce78a160f822e5411f4 +size 45146 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbece24c7f9fd326c81bbff8f25e46c7c6168539 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c06844b54196d51b0390db852794266a99be9a36ab9d873d31dd859cbd63f654 +size 54213 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f615175452b0c6e45f05ec5970b183f97607bdb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5849d253556d05126cf812f90a6ad40f864f3fcba7dc3d322926e7987a7cb47 +size 74351 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c49cdd038f36bc0dc4cde716dbea37793832c6c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed7170504d61cba6da96f72c0cadcbcaa66191323c82a6729b091136c1577b36 +size 35008 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0a5c895058fef91afabead84c93e943822307d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:097cc48aed9ce86a1d197cc57a5fba268ba0a29f56ff3fd942cc4c122e52d92f +size 39059 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3e248589699db720dee643d0b898f9177f4406f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60366cf934ce457955bba89867563758d547a71a16da0a3f02f589471cd57b9f +size 89862 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_318/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_318/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7eea643f348ea870e85e1eec01ce941b4608e386 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_318/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb4d16f5b3919ef03493d7e87e50ca43933566cd8ff79505d9223c8b7da9f7d8 +size 20992 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a43e7d1deb8823dfe353583e6fb91783023e4afa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84fb1eceba3a5a62787ae8a727a260a602f7b43935d02fdd9d7b396fa4359076 +size 15951 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdd5f591fa33c056d0c54e019caf466067858f2e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad95de2e84fa36f49d0bd44fee4bc39bf8e4afc47ac04cba9808c15200e3257f +size 74588 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84d974d200d733cbe84968b7baf11f343866de0c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31eae4882ccafbd2377da74ee521a8d00e36ba2398ba752c19060478a07c4f5d +size 26594 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9dcb0209a5f8b9f8d3919dd2350e55e06808b541 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:465c869b6df7419983ca373fafc8fa024ab7bd77484da2c5d01ed5e3930d58c7 +size 44151 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b50102676d5572976f18a4799b5b2853ca92441 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad14077df0de8101d6ec5c495d32abf091b414dd674a4f14a16a3f212b677906 +size 43112 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a069e7c63bb98d2bfa2720b9d6b646e9f627aa45 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c421b0f51b9a2650e53f946e255798df1b6081b42890786a0d713a9773d47ed1 +size 64343 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21bdcc6d99795178153b2d82bd0a3a129bf22e66 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbd9b3cae69b96eaabe9342696ea4c281c6a64b443bc798cd6522a5057833790 +size 67614 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d2d88dea76ed278cd02d4734dc555d4480dec61 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c860a669ec0e8f647e46751394e05376e0548ef1248d04a570c63809b1768ae3 +size 36782 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43c9dbbc4828ca498e5084d8bed999cb4f676627 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f5fd1cf7aa9762768b28d0a8fef5ec96e3fd37df6587e5e3cdc45b0399bded7 +size 47421 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96b0aa56ceb54ae01d0641481ddcc08c4cdcf611 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f843fc17315ce775ddfd926b35ffbdbfe78a38139fdac4e374a76c17c1e7adcd +size 65040 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4cc82947134c9ff16884b3d2a9fdc1f18e013bc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c6a80348835e04f842a8be619436ce54dfe9b08f85813e75e4914a95bd8ea06 +size 33266 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..018fa8009a3f9e1f40ec91068897fc4b2c952dac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f4f7011a99cb918a7e1df4f296eeb8163b441e36a5f11630469cbb927f6a8d4 +size 34465 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d356ae8362e4e16c354ac58f7a637d3dfc854026 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a992e750fb47920a3c962fb229a188efbe8654bff6a7b20b94e187875644d66 +size 33742 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66764c61f2ad1fdb674edab5eac3e4fcb36e9caa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:281fa9980f2ffde8dc44fc0df762574c914cf78ee2afc67c8398348f042e45b5 +size 18975 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96c7bbb2857e86649c82f9bf7b531095cfb90693 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f3b1c7c90697608847e683cb525c95f6d078637dfcf6af494d8a073c78cef98 +size 20744 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..799e25f52521b6cba01d49928a1b9cf3bb695052 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e49e302a44223953a6d6161806204a96323a929d530a6895e7f3036286fe637 +size 13626 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f59b9f3d061236808e23b27c336d01a4902d4ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6de80d0d00ef836fab1c6665121062c0e2748021c587cf27ceedad736d43e2bc +size 32304 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40a1c660d6c8c16cdfc8cdc2b6b27e4220d19936 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d4276dc1ea1527d3428693515ffe7e2fcd86f692c668093d1a3aebf4a551a12 +size 43376 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d31f8a3a71be747eea45784808b1eec6bdf75380 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2117eef3dbef7a4dfd107f4d8b56900ac926bada838b94dd3e42c57457f63b88 +size 60689 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e5215140382187be1ff2f3fcf8075ff59083dac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b581455d12ab03e3a372960f63a6da64891dd40df3088736ceb0e05e98ace8 +size 27150 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..888f4488760dc7424d30e186a4331977306d1d7f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e6b8917b30018904a592d080790d2c865da2f78b112ece542f0b6ade18cafb +size 14680 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..102cb0ceb6267588fb4edcd213d8d08d4402c997 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfa6e617012623a477a09b71eab0c5bf9d489fb1ad22e8b553b31d5b70190f07 +size 45142 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0c5312ad0ffeb91555a011df929140fe0b42e0a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a0633b657750ffef175558513808147491c4b137687183e16fbac7070ccc37 +size 54207 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e98e967f65c06cca7b8db974761d8162be4e3ae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095678261d164c2ae2eac1906f63ffecdad15c417f3407dd3cf3f72326514336 +size 74398 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..953ff44664e5bcc734e1e549fc5aea85a35fd8e9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476c7d16712e189b60c0d7bb9f67d07a7c580b26c5cccf1da7e6d81c0c5496c2 +size 35057 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8191e25306a48329f3c6e91489ad3d7685b204b3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46e4dbfc97a1daa0e3f2048d2ccc5ae457478610b014033d5966fcae1d1e03d +size 39062 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..944092a1a1d24032a776c4881a904fc14e5dedbd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7742650388e5ec62c6ee862ea14ee54ee72b07485f7b7dcaf9f2ad0511aa46c5 +size 85227 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_321/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_321/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b679fcd59fb844e524943472e2172a7750171f3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_321/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4beadd6aea0059b8c735cf818d70b90a5848754d2fc403c422ecf7f35429e866 +size 20956 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cdfeecbed8a234281b8f0e04f6b3e269f8906d6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7521e65d25e77cbd76bdfff109c26a6364bb4aea49d3e2b0fbe1210766a03153 +size 16133 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53df47f9e6c69831082662e1fa8d7d15c6d262f6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac2a764dd8ce7641176c9fdbe06cfb9849df93614ff227cc5559e1532911703b +size 74782 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6450de39829c0df1d1afe15f7ce8a1842d4d7b5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d07addef2dad6bc7dcd5b1060effe4a1d5d8a9f984b6d2f6279ee23a50c8a6 +size 26582 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6b82bce916ec6cd3279141682fb686312e12563 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d0a800f979fcc9d816bcdbf86516e48dd9b15f08bba25950a96d2ec23a97e41 +size 44141 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3a217b6ecae04c7a51a3bfdb7b685f8bb946046 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611f9ee522458ef203e5b5f27d0b71f0db48d7ec0bd89742c66883a8993c2a40 +size 43097 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ce27b4b747513e47d78bfe519a2415d99b08fd5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:167827a2bf377c032cd5ad6283e53acd6bb6503b4f68afb2e5363c7a022c1437 +size 63952 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80cc18c74f9e4308b0c743dd0a74a45931e0700e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d42d0f3a39bc361e039cc0f05d4f3033085232315cae3e080108168d072e84a +size 79433 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d42d93e47830f907a57bd7b7bbec805b46d6a21c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f99d39d07ead81d840374caab13d812989d1979ed3b6fa6b1057eb0e30da13ba +size 36688 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15cb7ca48aff797e5f1aa4eded77ea4640e45d73 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f0fbe279b43b54ae62b05528737bc6dc2466eae6578af5654f301b009547f15 +size 47924 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08175ab85dc60a89fd069c3d50a6b86c141e7ca8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee470e6de910e815e770d50033e1990ede48a0d04feb22c1c6106e48c837f470 +size 65599 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..296df1b09df8607cefe352cf4d0e3b0a9385e23f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68a6ef3d5ab1c216963aa14b20ab551753ee89f8ac77ff5729ac7c00f01a3806 +size 33404 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c41a8c6819855a9dfb9bb1139c608db77534b196 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1522f222624ff9531989e9625ad142981ffc4af804bb38118ae7d17ebc869130 +size 34603 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc30f1240ea32524512af03622b2da3e52d630d4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa9a8b5c93f54968320b41c0a88fb3fe8c57cf3aaf96e1b5d8c154bd338f8a98 +size 33140 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e29f8d0a849a0ea7fdd9dc09bf50f3a7439b86a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b257fa7e456800abacc65b3495e96d115526c0cdcd5168270a398e0411c70c3 +size 19038 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0bdc3b72ac785ebe9e745567ebd785ea37c3dff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5067724d1efa5bcb1b64df29c0356e989ecf4266159fb7e57b65bd77667b5339 +size 20707 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7886a57bffb0a90dabbc2c03a37e0cb5c100226f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:299f9c7cf0c388c6261be83971c7432b816285bc8e73f89889accfe733c7550e +size 13504 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae0cb442aaa35b27d2216db8d6b9169448d4c621 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c0f302ee72e3f203c87a883b6b871b1eca9536e813fce59e53f81e9e994efd7 +size 32086 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea0d3b253a9257d0cc88aedd4f6946fb55d4365e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f37a49d9618e465e6eab8734ba935ba9e6b63d849b74ee420d405dce08bdf197 +size 42710 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..621e6c355a9354830c8a7c509818c6f46e659ea9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff554a92173da525c75c017c4dd682a65529e1aa6f24123d34b3ac4387978388 +size 61077 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8be886b987b8e7599c9bc09032d6050129920beb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:904f673b4d974c82defb5c6069d0b0697350c74b219ebd2e7012e29baa67da0f +size 27389 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e36115c2fb3fa853b07b7020a13fa1f98cb198b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56b7caef32fe8444434724133a43a842c0afc74f4a98ebb92f754acb2290c688 +size 14663 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8858f77f2ba7b84406ce5d9a154dea988c97b923 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04eb86d80a00a27f3e58b579cb4b8ab4635ac7b997532d49c50bc2f6cfe1fb45 +size 45153 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2189e6cc5add59eaa58cf1d775bb236eeb1dec0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6ecc3add4ec7afd04a59644eb7f075e122bb414245297499d5cf7230e187855 +size 54196 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12d3d9d802039c398af10dbc3bf4217894654060 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6925846dd63db2448827a1cfbab7fd89cb2eef826cdd8b6b5855bd0ace83930b +size 74407 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca8118f4bc9fe002e01a3f782e6db101738609b6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628910e86841fb15b3d4d618a770dd6471b6cc1bccac24987b0c931fab3085ba +size 35011 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01ed24725823ea30c9833f0a1a250e28086961c7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f55145f36e7a88f47d6977f6690b204c57be9713cd7544a467f6736ee154f1c +size 39061 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..410141959df4618806acf9de5b83a73205400c5d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc88a6453b3bd5489665fee2f7f06113d1332722c1237c2a5183b2d90ecb5272 +size 88369 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_324/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_324/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b56508cb535070ba41a32d16b1e2110f1070c2b4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_324/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1188ab2cd6a5116b5592a6d88194825ee2e0d273ea64e4737e42463f0b81bb8c +size 20987 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5e9f0abd345797d47041006c3f33d0559a08d25 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d0291d54695e17f43a0032af640d6c669d56613017df61ca4c3be46820b390 +size 16178 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f52a2337062374952845a37c2da78e9302cdee1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d32df3b6298977259eefca107c3e34f1bced194e7490b1e747d9f139464739 +size 75070 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..485313ed88944e663725e085f57f197d0b8a57eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ad2f70cd6c8a3e42c851539c408e3e6ce6fcf7af550f4f84afce52535bdbbec +size 26618 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cce4015c7a135fe54a2a7c67a75da0b4253c8412 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01e6ff44b283f8275d2132d0d5f621272af08f7b2d01c508bad56d434357fd45 +size 43791 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec79f142fef1a98feae67fbb1120eec947f177a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6861f2d1fa6418f8b78368ba0644620b0e21e74d803f88e5322082b9c19a7ad +size 46026 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..142487b610196a231baae2a26628a457c238908a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a6aa7e7961a4ff613c402bbd9e492317d30b1d003ed6570fcbd0dfb4025bb18 +size 64165 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b602a72072b0178b0cdccdb86201ebfb8ad337bf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38e24c571bc467cab8d4e07d533a3c15789e4cc2b3a0ef5e85b281fc852d945 +size 75410 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd480e22472b30947ac6574d42ac58deb69477ea --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80bd549928559e425867756cffc695d16f924b77f35fe0118ccc6d11bbdffff +size 36472 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11aabedc1a71b209108c69446a6ae43cda81964b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d57a9e01ebe36372356649fe6431425e14d113ce2a0dcfbb8032f18bee8cad +size 47482 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afb5186388ef6f888f343be0ba0d10737fc60a04 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f1e34253a96bb4e443f93b2779468f58846487a3648e104caad1c0e3bc455f +size 63746 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c355e4aba1fe3ffafd76376acaa5c51e4eba5595 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01266bed67a3fee387ad251b10673330e6d38475ba72c33a42840dbed113e2e9 +size 33148 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f1b3da2d8a0e4d354c838aaab37ccd101674e21 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e48d0e8c09bac008616484aa506213f486307d1cc228f2b3475207e0cd3f43 +size 34573 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad93a8b5bca27831960483b8042999bdb38386be --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65d8b172edd435ae50d77cf12919d0526b9a132538e568895fa33df083a181c1 +size 34150 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40a272c017a8dbfb0c37fd65ae81c19dec80a050 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23f06755feb35e35226424b0670ea29bd0455ae6a7ee4ad0eb5b96d033b6f0c8 +size 19047 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a921dade8160e00bb85229ba3d153798ec69fa97 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c0d2ca9366f1c8fd8f897bf0b35e34a933b7710408666b561a617eb9b43f76 +size 20707 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..841c7cbd802616727a814067e4d06d184a8fdfb7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e983817587e7836774a335c2df393acf226b803c14583366d2f75b8eb9a2a7 +size 13378 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a0ecb7658f4938cc5f9c0849657582564914f0e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ab99264bf1ae738f94ea3deeee169a950f2959b8ac3aaa733a18568389a89d +size 31752 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a77d4944e5a463343e6dd74edb479771d1d5f5f3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59c23319960292efcd8edd4f23cd8ddfdfdaaca13ef5c57a9fe7f2c713e54464 +size 42799 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b59cc1f0eb8e31f66b40ffd76d6719cb994778c9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e09b0db56b78d88ea4cef6ece6da9806994abff1209f33e2185427727676c21 +size 61015 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40dd7a636a3e256edde502f2c8ca63d0d6474ae1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc57edfa089c1a98f5f84a77b3dc5029d94ef469ca4bcf0f73990d0f32443f3 +size 27756 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bec94d6c72546e1023b1a6745bcf890914f82ed4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35105f2ec2e53d5a65f775d1d30026d3ecf34fe94fb674a28124d6cdfacddce1 +size 14571 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abec6bcc83e173b6679ca3fc8ce73aee3cd111a9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b947285b2d8cbdb925e9a43545ea64b342fa281ccc3f4186f1b99248e34414d6 +size 45191 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e04111c770a52513a50874c05c820b28d2cd26d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ec16e17fe1c584a162f3cbb1cf8476599f8f5bc8d63760b8824c32cc05cd97 +size 54266 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5eb9fd884c1ec24afa3c42fe719a5b2cec0e7058 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a7742ef1e3516c670c48d8a7e90d6da9503e87839a264e3dbf5eaff1216c362 +size 74257 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e400e0f116f132e3e762cb63c54f1087f41e863 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ce8fcfd658c898e31f7327c533ab85f6bf66b6feb7aae9ff1a44316cbdfbb92 +size 35015 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3fe7568ebac82798075d807e0849a163ffaef5c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64f5f75309cc24fe7bdc7153423408a18f280d19aa599c394cb1d81d72e571d6 +size 39061 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a3d16914d693e67a02690a6dce0ecda5d369138 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e430d56a4ec3f661f4e430fa43bc5076c2af1c79b006c5f60d5a8030f0c0bd1 +size 88672 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_327/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_327/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51b371a1300f8de3459a32fc99e15a5232664894 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_327/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bcc278c8dcdbb035cf423823098d67ce77554a1f44c7a8e0af9172dd62941fd +size 20987 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0c931f694d17ee922cddfb82dd7065749d366e5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f2de62386407749276dac68a91f13bbfc7c03a95cc83dc87a63483bbd445a2 +size 16191 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6ee506dcedff4faa9177695f28f468ddc3eebe2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77709c42f3328fc268f9e1bf1fce174885739090373d22c02c125e96aacae271 +size 74557 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2999b7b399682cb43be535f6d3dce401b296861b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6984ab00e347b3f962d5364ecc74e47389ba132473b7ce7cb86bfce7dd8446d +size 26662 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4611aed28a295882e2c4e194f265e87f0770bd33 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6220ac397798ce5eaa22fc2c0a30b6fde1fe40ec851c3763a35bfb2071e486 +size 44066 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..581ceeb0628c6ff8747ebbb102e628917a8f1b59 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f3a4ca3bcac18635834d5c1f263d06ee75cd16febc53135fa1adc247ca5f71d +size 45448 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32080c9e7879849cb7e59ba8699b449f547f5d17 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:129d0ccc5b75dda448b12a357e889d4752c5d89be2a9f22a710348fab1497380 +size 64470 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97bee5fd9c896239ca64afe4c83919afaad16302 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:510719197d684bcf22c43c9ab5e298340bed7b9b9b53ac2d3f5421d6537d2f64 +size 74800 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e9cad9b609912ad938d5d8446ef8f7673c345d4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de9ea35353b2706c72e7ff1d2996a3fde88ac1e4407e7f93c9c6380f418ed685 +size 36355 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a09dc623b74d1dfe6f1798e12919e62b9a5c4377 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac2ebaf40715501f7fc834fa9f34b1619b2574b9a6cbc9ffed2fb08c562f1cf2 +size 47154 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8969442c9b6336c343ede54315656261b0caa174 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7352a61d7bcae53033bdf940c17f36d98b4ae2a56bb71465dbd9209b410c0bd9 +size 63564 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1255f7a6dab896888f11ad573134c3a3b65f4525 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:205b45970ea12ff472c423be6aa99085e3c2389d788094f7acdd59248d63445e +size 33186 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e2547cc42361244d4957fb440b7162bdb11a633 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5c6b7d4c84b5c1454f635cb6d4312fba2fefbde82824698c84057b8dd57727 +size 35013 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f74aba1483c14a67964bdcc6aec357d677c57a0d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbaf34dcc659c3fbadd1477c73456d86ce190aa8cb14360be45998a377e92dca +size 33226 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..442de5ed070c6cdf8200526d874b96a7fd5149f5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d33163e3ada468942d94f3515196cc413647a8c1b19e2ad515bd2255b6c76a2 +size 19035 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..263bb1524bbd137110503cbd8062ab88e0daa485 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3acc511683c11f5c7b3af1d9a8dfb6ebdecaa35f6d44e802bdd8394a53362f +size 20725 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d1b70adf904348d2f8952f7637fca1857634c03 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7900d88c3fc7ec7a66e193943e70fb0e342463687529f81ad9214072ac32ef39 +size 13706 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44f178c0248a64b3442ba26864bbaa2e87c8a882 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd78b3cbb3609cc2f6caacc00cfd292f27e2138feffd9eb124980f5d084f14d +size 32035 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b40a3421e936b2ae8545b0fcf1b816960592d515 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b690e67a4a67ad82750052884101519208a2abe38ffdc58cbe473d7f6ee4c3d5 +size 42592 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..759bfbe18ae22cb47f68bd4e86586acb068e7e7b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e832b75941b80eb9db11f49b559f996ed6ae28733acbccca64d40b0d175cc0b +size 59580 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf10682702d0666a793f6ca71ab38632247ae191 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f5c776a4072e81e7805befdb9066978b75f8ee092262de020adc4bf9601310 +size 27667 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78a18e9fd38c4f2b7a5208caa683d4735cd61f8e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c83211b66c04b57265df6ee953333beb23bd9a7b3500b9dcd4bb4228493650 +size 14595 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..034803da5af5ad5b49b12ddc362b424bcb0d0b71 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b60594bced5d39699b422966ae9d2f72ba48b93656c23863f9c89bfd3eb1866 +size 45061 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02bb37824c8a071a7dc3ab4d567cd9a484b5a7bf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a9bf4da2c22bba7a5cc7fe75085ff6bbe0f6cf0e91d67f795c366e5691b3a5 +size 54220 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f779ee7167fb536a6ebbcfa40ad80b8971a0fa9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929be108be3d341fb0aa691e6a307e0887b5fe85b89ee920afec1f2835527380 +size 74414 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea8bd6a7fe3fa3d70eb412082d5d967bd142af44 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f72093bfd2ed48cf518e03da8797d8fb7a0cfa9a2b07472bad1ab3194d5bb64d +size 34970 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31e7162e95220a027a45d55618e036f58f168a81 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac98244d21f91f0c006218f33e98f77eb3edb0bacd184c1aa1787fd1a4b3ee24 +size 39062 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb3dabad9634d8b29c1e603b2089631dcd7a4338 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:686bf7e5c4fd66c86d8046dbd0d32c143632aa8b0519f2d2a2585498c35b4f7d +size 88757 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_330/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_330/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9638faaaec728367133978fdda9db9cad2631564 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_330/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb1faa3866607d6347f2569f3aef93e198dca42bcda8b38b8d2ae91ec09100f +size 20952 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2c34dae08799dd36ac321af9ec5247fb7de3131 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af40fff2c4e2aebce78c42774b1640a9a085303d571bd8cfebf3d81ede3dcee7 +size 16077 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f32f8727882cd566b9eb1d7f885cd9574ae26920 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b794b7c5c027afc494e0c2156b304f5fa3b49c3524ac73fb2c5056cea22fc6f1 +size 74804 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3811ecbc15796ef189d200167420a2c6fb876477 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:027d54f96d27644a94f735ba425b9ab855b4d8a4cf3bdc81160fa873186c12a3 +size 26376 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2b52a6e2c0a8304186160e41fd957e073b5c4d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f82c476ce9a1725436ce6f379cb0332947de0eb28700c39494d99758a9c5d8a +size 44651 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..424ae342180871aa6b1da22f2839c65d740ce51f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cbaf85f7403ae5910639343838df98faef26c48aaec39690c4217634c415ad4 +size 44288 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1ec6084771ad28fdd940399b9332a34d65a1616 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c076bb0c17576df6008ead692902072664226b183d6b95e55c3556d8d2946607 +size 63910 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..342604ec59568011aa5c3d6c10e2a2fbd14f4929 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4803811a555c16eadfcd682ee30af491f49510517212bdc5ddd7fbd51b7a76f4 +size 68231 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94512f6e68bcb1d4050a62d2a4f65881cc571a9d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49661ebea5a0b7ba61abbefdf420c17ad249b1fd975741b4274a792fda8d4463 +size 36372 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e751cae003d27942a02c4c3abf4c14add6b4c923 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0733eaae93baaceaf54e8ba9dca185687034924a69eea125648b5ac9354e7d10 +size 48198 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..377e0ff95175c28166c9ca1763a4072e0b8da250 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd66e5d5c916cfda80a73036d1b6a2b3e3f9b3c3e294a8098723cc1cee87945 +size 64557 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5467e1cc8d1b40ace870e43b07187a19f159b38 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9bc58cddbdba53a65795d5650c5034dbaa174a06e540b1eccf7c36ca6c84792 +size 33405 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37c8619b713a5c3a58659fd4eb201acb0f70f64c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba9af26c1cb836cfd91e3ad4b209da9223432f4ab0624990973d593236fb79f +size 34319 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d61bdc06bae408d8573e66b2ba1a211952b97ed1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff8d9d8885a7411843afa41faca86926cd20b30f35a32ad8ed5c805c365f0847 +size 33283 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..052a4a2a1653bbec9073fbd94976ac942158c7b4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28b876e4df1ebfda5b7454ce48c26f2d4f8ef518b4e3041fa600ad021a4af90 +size 18974 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6ab265b066e84ac01527b3b2c33d5c52edb5207 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c1e2a9599b2db4ca7a6cf28751c385d129ae8a5d45266aaaa1e6d3861c2998 +size 20713 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffaba0c8738fed55fdb32472a8c0ff257d80897f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0313084b58df98f7b052f84ec4e14062cba62ef833229332a43bcefd6d67f311 +size 13350 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c8374069d5afc5575458d1e6cda3850b52595dc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fb1446d86b43df1eff4823e322e9d7069e309170347b2d5bef2d88bf7998786 +size 32450 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f60248be307d90a0e2a9058c673f72d23808b7d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94187b11e9be8368512d26149bf82ab952511f33f30eb293e6edebcc4796d53d +size 42568 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bea651790bf5eaf085df59c26c8fc3f59bcdbee --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41adef41c20029ba63daf4df317ecbc35d240b2d66d36abd568dcf832f91d174 +size 60669 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbc1cf836a067d0ea2e494b782047564676b4d01 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:506cc501231f58c69be2f53f1b57fe43d45e5d4dbc627e458d7d7ce73b785381 +size 27646 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83d485408c7911def9155ae33c8efcfa4b331563 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e980e2f489ffcb09048c4d069a3bd9a4d7cc461ffcf57846f0eff2ad3cb33bd9 +size 14622 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c6b438745d20bf2499a7a7241b06902ecc05283 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd4027bc56c9b39167be22a56a48603200b70eec135ad551f4f1f659b9abd97 +size 45037 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0c133bd21b48a69fd5665e1764fc3bfc9307e2a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:194e62a0218e23c32d971749932ba64de34d2a71054fb051052c2e46ee3d2f9b +size 54267 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e6353e45625de604e7a6e4b8f5fa169fc62675f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92ea736e5b62c7c90f5aa05f216ce4c1552cd9b60a43d0d8b51045c83a879a15 +size 74426 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ae8fb7662912b28185320d6d05f22eb7c294e7f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d173025ff80f811fb02be0abc2d8bf33d64435489cf12b00dc3a9d9ea866b4 +size 35075 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4809477a06d056389dbf03bf0018892e89b18c3b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:149635e8dd137aded27ffda6c953ffd5fe3070ff21422fff83a41858801a1ad1 +size 39061 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6391d5fef4749b55cbea54bcd24ea4901846ca40 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a611b5b39b5135ff1c914fd759893ca67cd9dd2299b642cbcff7f5ed9cbfe0 +size 86102 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_333/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_333/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..277f44e4937e16caccc1218d4f9cde6ca8754e49 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_333/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3620d738c94291f4f867f75456a14ca344d6c5ba9ef2e3e80b6c3bea128a6d73 +size 20977 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c25765f35d5559e84d81d6fc00e1ffd0b85a5c9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d80205d27e5bb2c55f0818357efbd99c5ca16e329265cf048b16250422826d5 +size 15807 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a95461de99b71ef87907269079c1c00ac0494aa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199f36b0c9b1c78422ff4953444253bc64725084ca4c7cf5fe50247158c309b5 +size 75287 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e09f49d004a729dc3e73d5273f5dd46a2cf3fd4f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcb5ade0b9992bcc0ac29a96342fe30dc35c28a9fd7c2cd8217d17c5cf1ac88c +size 26707 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd78469a3a4ff02390860ad095e63ed93058750d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7ee085a919592b35efe39378e58ae852e84bf4def9aaf70c6d395998b20ce16 +size 44620 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af8781ed0cb3f1e2fc60d51e7040e293bbcba0c8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a904eedf1b8b834c890857dead3c87145307d853bf2222658830e4548e90cee1 +size 43192 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c33785154feff7eff193d048786cc073d9f9cc23 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60ab44918b2f20b49fa504782199e76c8bafc9ae489cc1f4bdedf5c1a81bfcca +size 63856 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84463f6a61a732b148ec546732d2d27fbc948466 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60b623b61d23627219c85f4782a59651b869b12a13d13388c11681dd560b683a +size 70843 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e564cffd84a0eed9730c0f58245fabb4e8ba5f1f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2327dc684bac78845e1fb0d1bdca5acccdb32d57f0aa1c4e26c7cd2c66c24eff +size 36527 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8fbb194a283c240d2f35f5119ad892c1c591074 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24284c9675ca37427cce80d85c65e3d036faac23d2f3e4ce2d19f17c76957923 +size 47779 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d8d689649549f304181c7cb6a955eb640d5f5cf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d8aa835bc3d9bd22c8d442f8dfbdab908cfb0cca2e9e7aff5812bdd9c11064 +size 63695 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d50254c3777eeb104d11bf12587f85f47eb0d7a1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b079bbeb2d2dca77c70fa1a07c4afb5e3881cebfeab9f435c6ce0154746d6055 +size 33315 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f3009697b24f982c7fa91e1d92d18f84b0a5be5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a7db6223760898f1763cfa6f565449bd243928fd348540708923ea80ebc9fe0 +size 34934 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5dad41d6624445d3a22f8a4b7edbfc235e3b567 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd6968077d2dd156087dc343a6fc307203268ee61abfb69ed2b2a440778daab +size 33588 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..667512ac353608a51498da600fd628ae90c394d5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221e5275cadbf3fe0a230d49e7688c105188b621deab260bc6c6d0a390d887af +size 18851 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b2e665dd904871d8f2cfb16857f179daf74106a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d206ca932cdfaecb0aaa990e4e988abdcd3b470cb44bbc4ed43cf56a77049d40 +size 20722 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4c90744802fbdb62f254da26e1e39da8fbb0905 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b91df3c409428e155f5639e99dc8188fcc3a6adaa7b2bcd5b3b9f808f4b2fc +size 13220 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba16a82ca1f4635e88d2e4ed2ae2b257190d1067 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0380255d708e3a526c364f46fe4cde717c9a0368f3dd07673c5b8e30a8470d1f +size 32579 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63d5ac94d2816d85ca132f903c8d7872e2b71b67 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb246bdc5dc30d8931ebb21da3d6dbb36d883cd2c08bd4d38f18fb1d49d5fca6 +size 42593 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcafc265a6d4b4bfa226d883817e35524c2a26dd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5918af051b256faa520edc42a96d382491f241dc185f34c54b82b7cbadccb4fe +size 61539 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ba672529bf1080a733cae7f73f6e510c983cc34 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91d116e95c11896daf98bf71d5ebc6d041ba05e49bcf8bacd2c40d7d2100646e +size 27895 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20e59a82e4b3e2330374f2d19eca076d174d3086 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c7d3b9903d993879747a4a80e7c5f0fca0adb5c70b39e8b293bb85127e967cd +size 14630 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e56bb02dffcce07c1a506724ec7bbc86c55ca24a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b68e1efbc9cec56c9cdeb71ada4d3cf8ddb4c827c24b78251bae5b5e75fe58d +size 45112 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8a0c5c55b566ee8a5deefb256ae58be71def80c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9ffe398163709d2f02513000037804258cd98c2ce9f833bdaed70e746b1742 +size 54227 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7356ed74c57031b3cbaa087be1e563f852b59309 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6013dc06aa1eeefa791706cf7bbcee07f39dd665c85ad38cd8245faf8143dc99 +size 74393 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..147288f3baa44d2a6cac4b6d9d76127f1a844fbe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f60f985d7a327c12dd19a072eebeb3da517dab49cb262cac7dec5bb188530b3 +size 35008 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44d64a235ae809c9490d43cb50de215a9ea48924 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44d8d88f57ff3c73df5f425622cb45a86f6905a8778db0e4aee8ba85362de795 +size 39061 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8890f201f16e74c6631511fa036b71f576245efa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b5a0275f95d9275bef9b5d068427e9b5c6476aba486666c236e0c2fc5f7bc3c +size 83304 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_336/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_336/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4df5060501361691ecc4f0eb578c2dbedf3d2173 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_336/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df8f97fa81ef791c11ce3d58d886c49fc0614e6428e5dbfc092ec964272cc509 +size 20968 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1dc8d007b8ebe13ecc50e9511e4afa4e94eeecae --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2df6c64079a0f2279d738fbd051482acd5676cf1f686224a7b15ec8a3f15173d +size 15845 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e70396b3e5e09e64da025abcd5be50c258c8bd91 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c172f3a1f00795029a5dedf285bdfc7f74f4e7faba6a1e82f83f693e10c4629 +size 74826 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85186743be6986a78441fde92ee6147f68016cce --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8a776b7f8c01a3c37f49938c925afdde99c72aab73ff628bab8720ca0746135 +size 26586 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b316803c5342f91284f0ce72b932b19f6baf8ce7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c26a6c667722dca3c93e9a1d9679bf4f75a41c50b052b46429efe3803b183b6 +size 43346 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d48897a7669b8b9032119247dbe443adb2b9109f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f2e022b382fcac411ce1484999743210c1237c776282704a86579e0658ebf6 +size 43572 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a78136cfc7c013cf186652b2aefcb4db11cb948a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abfd0607bf1851af4bfab597669a577486f4c7295a1700504eea36318d3e372f +size 63694 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de54fcf86dd11617e0c240228902b24ede2d453e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e537e70de4afcf52bfed74092974e515f9fbd9d7fdfb06738b3c59b0a1bd650 +size 70998 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3e7e74d77796e6d70987ac0bb77f7c80058ce9f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ace20848bdfb8a2267dde440d4bce851079de825b04cc48e301f79f9e0c222 +size 36498 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df284cdcebab860f37abb2d9ddbdabb8d88d511c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398be3792d03121de50abf65632c78c36d88dfff589cdb4ca93f32ce8ede09c8 +size 47544 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5de1e43516ee6d185cab48e674ca9ca2d184415f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2578b742d8a183ed22a7b4ed733e551f14bf79895ae977ecc773775ba5dd7bb8 +size 64749 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5700852b0e67e1f5824c7b40c4c68258d137fb5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba173a4792e8c5e2ab89bba6f4d65eee5ae824236b112d5189716e287e8c8885 +size 33185 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6db181b1b5a0506742591f88b61d8e51906cd241 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13ddf5584cf8c6f6e56c0f7fe0e7398afc98dd4e34a997e98fce7abb0e7143f6 +size 35028 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..828113d774cc57e32ac29175f19591bf566af69c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87021df642a2209b0894f785c4948b7498cd241471cc4bc094b77c42cf3e1943 +size 34568 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..952fa523c7755704ca35b10b8ece7a4525d8fbc9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1836c74d2fce8998994d5562a3bff3e8c8d2d3a8c3277da4102a51a043fe8174 +size 18806 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ffb7faf1dd9113455df74b2e0b855de8fa28aa3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:658d800dbd944d8637d88aa2f3fbe9a2bc92bac6a14ea803e7c9f992f3b5737d +size 20739 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c802e3a00110f4ab05e1bb09a3bf78a03e9ab0d6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181b5a132691fa600a66b0705195e0cdaab301f6a9a843f0022354cbaf18593e +size 13443 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b0b80b16de4a384163b2e26c05e2db2adde2337 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:851aeabe4239bd3252fdee6559110eff54e86daba0163526197d829221b19b09 +size 31654 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd7514d88c46ad9549a21a8de4bcd3cc96f2a242 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ff86a6d6bfb07b5ac671c66aca52e785cbb97fbe8ad1750200c024130eaaac +size 42545 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db537a840b63c441f40ce2a27ec740182706dc24 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:679b1e014582339a335c5c3827c1cc6345b3f03dca9841b5d8aa22976b43f839 +size 60916 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d942a7ffffc935155356b3148bd2299f777f30b6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ef7cf2ea567b9133dac73845bfb429b4f600f2555c1be1be4c564f22a08156 +size 27946 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1408e7fbf4c21c55a273ae8365b928e4ce06706b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d98c68fe7748b37b0a4803e569bc3d795d11ebb913e07baf5389879213dcb2c +size 14644 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..168462f65303f88d46fbfefa70917980d42013bb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6ca8ccfd6ae3fe755f09594dd520e9b9e3a99639eaef81e3c0cc01756dfe5e6 +size 45193 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed7183e11d0b972a21595045ff68f82a5cadd9a3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7021a970c7beea05bef0c50b647ae6ab5eccda31468714a651af583356f7861c +size 54191 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8830903a34afffee2d3db3617696c0ebc14cebbd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c015d7e114ec427008512487da45054e652f6e0850f4af843bdec68eb9426e9a +size 74444 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df966e91a5e80c73a43048dc097cf8b58b08de2d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79c5922b045c0d6956a481c4e28587e2c7d7f70e16e6ae0670995c03ce995fe +size 34983 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b259ed1c826f1428bf60531c361b13874a20a1d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3234a25cefe3142f269363041f982a9cef7f0a98887dbb858e8f938f0be084 +size 39061 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c420c919c8709835ec5652b4993911d0f8012add --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df3ebb91b7c05186327e86afeaad5c97ce1d1e631fdc32fac4df86a4c12d3b6 +size 86889 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_339/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_339/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbb20c66e74713556b1202eee6a05018f51ac9ab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_339/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:571bed769ef24ebb26aa9ad7d495104489bb5a23a19455cebb6b85d1a9f30401 +size 20987 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e468b55d696d2a13c5173a1f1b5562f1ea0da57 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9715f3bde07abc6bdb978b332dff5289418d2e1a090d12088e459480415fbbc5 +size 15917 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9af9f3526eead89033b2d11e34f5f7cbb970125 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df48211ac77e319eab55433e9bf4279821370f06b20c43fe1e431c6ba09689a9 +size 75059 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b64067a8f8cfdee2c7899f3961424dcad73a9f5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79722dcd945ecdfd1cafd7fffa00152962509bb8a11fdb89641aaa1c6ff3a5c +size 26705 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df73ae057f977a6a029474dcb3d93f67e32156e8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af273b7e9e232e9f76e524ac4f1b3796c29d58048054c093808ae4e380f1ee2a +size 44535 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7544fba2912f5fdf94a94d075cf881f57862e85e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dd925ee5eea9ba3dda271c0bf84bc73b289e1181b8cd39aa8da3c7b83c6d53b +size 44259 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03bcd123b2b7df23f6f6ce75a9f8ffaa4c29dbfb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9589671ecec1291c23be81a4a56f9f20cebdc044d6c047def48c522ee05871d1 +size 63885 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fbe415b5abeebde76f98b411ec7f4403ca0f9a3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a44bf48a4c0f98eb8f6d24bf24fb7b65b96a7f02821300a68793cd197dd951e4 +size 73633 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8e43547dea57fe605803f1843743b13dfce61e3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91cdae6364445a77634eaab3dc5e7f19b12a205f11c634ae40548551a22e1c2e +size 36375 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36256e81c91d26d7515ca23b1805c91a2e14411d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7eb1688259e0095da4b3cdec76ba2c0c21ff2429511592e603b7b12706e45f +size 47460 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f811d309938dcf3945eb95c4a72f8b717d955bdf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86755bfc2aa757033861a6313441ef9c3c0990f643a4579d0a69b47d3a541c34 +size 65799 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7be7ddc03d9da1c1dbfee86a16a80fe0f5868c72 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a6d82d112a4d19594fd71869cde5175ec828a444eb54f3a625ee13c97ea968c +size 33187 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da432e2c63defdc1d22439692ed3d4e9ed7c0517 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca57da56019722cc23ec3b620048a7964701c3b772740f605eac93636092172 +size 35293 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3b22525fcc6d18f29d4815376612eb7c6a9109e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05e96443e2b16b9e9229918668493b4f4d89af09dc33f02abf782ff936df25a0 +size 34591 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..785325a475f7c4e79ddbf1496e359ce673bda568 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd298e06982108390eb0f8ac6b43ffa14edb8c88e1e4bd6987d3b8adec3f452 +size 18896 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b6f9f20fa1230facf60492eee3afbb370f937eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3678aa4be99b6c38f6bbd21b56d87e5f5bba465db9cabf37cba3551ae59172e2 +size 20774 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e977a9fd954c419efc1586309dbd5384a42b0423 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021c00c4d224b3ea1d0a5ebdaee8eaf984dc522bbb7c3bd47e0340d8f8ee18e9 +size 13379 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3eb8afb1b229eb14b7a1a146506d99e791a0064b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a86254f3f37db7e02543d289a8857231455954e0ec83b309ba3744c28453d4ba +size 32308 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af3379932323926965d54f3842e8a383cda93852 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68171596d203db87aeaa174d0ac219ac96b8dbee07a782fccc9426dd460da094 +size 42818 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77a4a51f5b062de7bc10d5542bc75a7781389391 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c714b16cc3da1b35548e97917d4697486db59981a99d29ceb44a23e23ce7ba +size 60685 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..615021e635b1bd0d20bfdc1ede2e4a5ff24561f5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91c348e6d5d66aed62b80905270818cb37059dcd2007ae4c51f031169e7687ed +size 27447 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88f87b82f63d2080e0d0969aeff80c1af01341ca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c659f02708ccb8d1c5a634d5d4320937e8cf1e5c4f9f2b07981d0770b48e932 +size 14593 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a071eb1f5713ad4b350ca098afbceb66d4a1cddc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fae9cdd61b9f2c0907e08f5780c44fa0dadaedbc4694e1fbb43ffba189226fc +size 45064 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20e70827e79ac00075e5e00c765ebb53a5691070 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0b8e5023e4a05b485ca6c26be039186ac83f5b5eab553996c49b2b5e16399cb +size 54195 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68b34be32a2b493094c87ee0907016e248eb1378 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4313e2461657cd3ac9fb6f0d9c0eb19ccf6d82449a688ba55076589236053881 +size 74292 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30c764d1ba010da5607d33fd6a05905ca96f5ead --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b085159e2f6de19c8369e6d3995fb697f5939c6dcffbf916a298acc8f9133d7 +size 35013 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31c604d5b2ddad8147ab0f695aa5e090b8d1c9bf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02e1efcfe9516f8b060d1c7bc12263cb479998cfd621232d520a603208595ff5 +size 39067 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c85d3646392758be1c8c77654b8adf68686dd1e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd60c5de91825a582a950b5c5da797cf6f6176c24f13e2a16456ad6b65777dbd +size 88251 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_342/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_342/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f8de93eefa96563e49b16e3a4a3d6b532f02304 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_342/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2328c9389de4186d40560c6ccb5dddfda9b16829175c7a658ea5a06aaa7be117 +size 21012 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3d6fd96fa4d2e76ebaaecb8de47e0960f786fdb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be5d52880093d910a39e294fef0dcc6712dead1069b2e781617ff4e4b311ec6c +size 16158 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b5e1d4cd21f16cee0b79acb50ce9dae238d820e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e26290ea55da5f88268f167d0d252eebf7de24e30b309da8e2dd8500431744d +size 74641 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..274057891aa9db0f704e8fcb417328e49377a719 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9856b933aad56fd5ab416f911b29bbfab17721c502124960e20afb02ec99dbd1 +size 26723 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4614888f9efa3967e120096ddbc90aecaa4499a4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97b14ea347f637d505ee1bb0196d59d97576b5722c33dce7ef1e96de8eb1ef1e +size 44671 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbafa0099395cfaed38c09b3efd9eb29023d43a1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5d7193c6703ef2bf4ea7ed0f0e21978fde74239b43c918a7655d72320da04af +size 42522 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f76eb0d4a2e20f2c28d3cd2a0f1de2a36ce8a22 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a925053c5fb07268430734447a41debbfe36357036d6efd1547caa8ff01973 +size 63856 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c1152e4b69e2e10bfb7a317b6a3837549e65827 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f563494e0019a391417fb535955ab591d8ef3fbce7119c43f63308e126156710 +size 72798 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..705e115ce3728bd2e8fe61ab0b46521d8a1eb8e4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0853b6fe02e419314456fa6b5cd1a3721290f61ed4a170bba13d1641c8af8e3b +size 36465 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39b386ff67d496a878792db2d196f89dcb6e4c8c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b062dcbcde39c5eebe597de57ca221a2c41f387f47306a5cd86930a0bea001 +size 47622 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1ee5afa425700faa17b0b6f862cd461e46d29ff --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df4ac97159c03beeaadbc4bb5b2c66c23a0ac1bd9f914baa59dd28f9971f0c8 +size 64643 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da9fb242abd69511c4de351d9ab11672e61a801c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bcab9b3b279668c11c3907764b7e0f0f62f8884af46089d177ab06a7ed1abd4 +size 33268 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64bb737dfa8cfb2f53ba1e62014ec3480705c91c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e630f9be314cb9f8a2acee57babd5fff382712bcb6856694229d844d40c2783 +size 34676 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5517ee3286a791c902e4b4f6685a661c39027395 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:376bc57dd492b877f1a5e79a572b27c07f213dda3c48b23e6047d74202eccfd4 +size 34786 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2619c3d94dd01279a27bf97937b81b84feca556f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894253ddf5518bf95be23130986270106fe2bdb70943714aa61b37e04cbb57ac +size 19095 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa0822ddddf6164e406bc149a25420935c4cc3b7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e3880d7f26bcbda147f4ab242b9e12238738d6ee532f735db7a7c16fca8506d +size 20728 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..265b17ab69d66f873bf151ef6299cb45085d9ecb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac89613a1c7a532d9aba68cd383d4a4c0ecbcff64bfa37fbfbc32cd4b07d45e +size 13303 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2404078dba49774954052258e95d1adb6c2b42e4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f570302974afec515ef00a425df895e9f6aed18836c06d09d2ff8ad8b14f756e +size 31954 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a6903e3e8bc67cc17eb59e183290edd619144fa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66f61dcfc9d01f335e7d5575da93d1e43eee1e7f88c1a4c44e8e4f7bc3d8cdb1 +size 41976 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db00c491083fb4033e9769ba17af85b30b18c8e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65c50dd2b1fe4e7f16848396f2ae3f47c2d9c07033497cf6d0fdde2de99b7367 +size 61319 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4ea22b2bd30f1ede9a98c2be23987016e001e25 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f11b4863deb6668166ff55a2bcad0db03f951ece9b129de0eae4f9b048139e7 +size 27112 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7e8dfdd0ce871ac5ff042fad5bf01de9c77839e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3cab7a5f87689cce3ba5231e1eb7cb335124e7350a5ae09d2259e5fe9f1b2e0 +size 14668 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ab586e5b7a89724c4f3c7dcd6e77e6588166fba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c413cb466515ba95632932d37e784f686588f7f6ae95b50425c0c2297ddcd151 +size 45101 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2191cb4d4390de24e6f3a0339542c8f55683832 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b08a6cb3f7faccb86574f02a18baa0acfb5b7a04d3acaaec3ee818859d9f7e +size 54221 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..296f65eb7cb9c586ed8f1e2aadd48417253d751f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:295af7a5ce93ddcad2b0d96e7d0bce8e0800f7d4ee57ad195f7e22e4b59ca955 +size 74341 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebe225bbedc1e59b3bc993961e29054d3dc7c918 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e889826eeac134893cd3cdfb59b0b191d4d3e106a154db3bd44ca52eaf963ee5 +size 34989 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bda20013c0c085bb339174ebf81580200f0979a5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c24c953646f67f532072c5e829a97d7b7c8d7873485eb7237a4ce9e4c52083 +size 39060 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01d6d982c21c3c00a5c19d8445453b7f895b6566 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f1c4351040197d3cb3bf3119399819c8f3f492710e3c76fcbe956932d16cc59 +size 86462 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_345/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_345/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5ef9a24b32726ba27d1881310cae20efbd6f027 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_345/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d98323cd03113ab7ab7eae896a192bad904eaa84d848c75f8f3a84e464f1965 +size 20956 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f846209b865c944aa2f69867c25089d9660643a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21dc284293b598aeab337e65576278c623d08cf7aaf940b7c4dd1b846d1a7347 +size 16523 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc4b9d8ef0e99e1514b81bb7617b03f5fd6fcda0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdc0135778f78893d6d32f8dd50e455973e5c3af9c6dfa45d22d1d9e769fd083 +size 74700 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5791a76679f67c971ecfb9cfe2a969f576dddddb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832b0a5f1e1c7544e918646d2a165b0a4f8bd74208321849013f6a1239c2aa51 +size 26708 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8853b1ef5bac0e8e2efa6860cd53028fb29cc015 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:178f331edd4464e1a68c5812630c6eefbb2f20cf96da424baae799744ce3e5e5 +size 43376 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9832a0dd0a070cfe2c198c3fed3df840e918a2c4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce994c0a610560968d63a811f74f34f4199cd9d04bce2f5146c02d3925cc0d4 +size 43798 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d8229d6cd4baf593724c014fcbcebdaa56fc12f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0358d2c110a2f0f695de92d57874c337fc0018512c851be77eb583ac06eabb70 +size 63557 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..982d1f86c8c00bb77d92b57525f56dfb23d5da75 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35c80338c1867c153ea0da13fb52513feb7303e3fcbe1461b1265e92453f723c +size 70767 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..005ba3110443f88225e26567c710e1b2cbb2e056 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241c007a04c72f66a8feb20ce44fc058c396e024d0956f9e18853d3e1ff8d353 +size 36316 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05c593dde650ad247c50af3055af40ee126db4c7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:721d39b0f769eae4ec63a3de9293abc7ae1e8f13487dd0bff43059fb30684045 +size 47494 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..362e448cc1b86d1c884aeda70305c9cf197a433a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7278841172eb5bb7890e31bee390b26604305e18e838441440ffac9ac1a98e79 +size 67144 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b207fe4d90235a3392bbcc693fe6a93270700388 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4419d5b2396d3e1676c449b6460b4b2179881ce28f9fc12831f5cb8e71fcaf +size 33206 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74f5465f4127c8d0709968b8ad2ca16573eded30 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49194cd909b5098dccd6c4e79e1223b46ccfe6b7315804c84c47aea5182f94d +size 34944 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b01aa8c4e4829012a943e1619917bc4c216377bd --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803ccb1840d10f26efb45334107c62d3126ef160703f16b40c9388fb8a705a80 +size 34075 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e5198f53bce9353b5d652c0ed00324d1b54f422 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:705bea7f7c8a5ef18244c1731fde5eb6539a4f9a90c2a293990b422371471c2d +size 18870 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f9f69431f7c0ce3440fed497b029353fdc437ba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b6db9222906b6c18404f26c2f78db93e0d946954030b0ab1dd69b009945f2a +size 20792 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1bc386aacda5a73c06c051616c796b73101bc47 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:252fe12ade9afde3b723ea484eda1332a835e3afbc7b70d4b05356d4475a0307 +size 13514 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af50f46b0dd1c461ee4e2d0516fb3a80d05ef1eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19fbc3fc35261f9b1c29631def163b64ccb95f53e64a13c8797c5af9cdfadd21 +size 32090 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a8d741a91f67e50a4b2b55da13e402230cd9ea1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f795ce22aaa2790329aa78f1e684881aafc5a600e7266440ef8fa08aca3004d +size 42857 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd1aa40c6b69e6befd774aabff28b012de001ea2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7ecf0fb87e649340ee4e2c031c7271d8e0e830f92db856666bf114f15508ca1 +size 60385 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c3f42aa257085bd71dd9fadaa39614685d0d3bc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:258a919d48fc516f3688af3d5596232ff668d7c90e5f0b47e2e5a7a652869613 +size 27370 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59387a613f7fc0bb979bbe4a07ce2e70edd95b5e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bbcc19545b1009bab6d8d7163f339267fd7a0c2c27aca082cf5bd972744c099 +size 14727 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1809fed126e3fc77b98e13658efd4e1c902c64b6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8ac4501cddca02e2a88d5ff9170979755c112f41f5b02410f14d24f87dc205 +size 45088 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77f2034cd5177f67cbd0fe63ca3cf34966cbbfbc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473a5040a7086d628a4384a39dc80c9b627026781117ad1244cd2d0c71ecf706 +size 54233 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..747ee9abfefb9a6122dad364f89cd1bd3cc69084 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e0ce1af07a019efdd7493ad3d1115c72f7069821a7fcc49c9e90ced11108ba +size 74389 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0e18e1a5ba61e841d62c028f9abad89c76da03c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac91ddd5e423b3ba0334be3382fae17403466561a01007da758db1a0fdbfb089 +size 35014 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b59e1467a1caf3fa43a2632be5fcb82c2b35eec8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347ea7f1670ccd69f53e29bb7e6855b0e3da4c3fed23be0356a8c848c780be5d +size 39061 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a40571d77021ffa2c6a84621a493457bc1a82f4d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b0c714fbbf8615f1e42d18722a73a1d7225f6afd4b1b96556279a1eb17c1a2 +size 87329 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_348/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_348/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..036345892f6b9a351e52fc64d5713d4bcd95bb1b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_348/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde429e467ac398eb1bdb17ac610a794e912156a3c5c4a5a2c86a30cb3593682 +size 20981 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48cb95126c491632951dc2f054f460636eaa8ad1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64688159db35fb535a0c07bc80957f38b6024300485fdedfe9a8c5a7af164506 +size 16483 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b36e56a85f5276bd3faa8335f1c791a7a06a1eb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77393e7bfa3059977dfe2b2d0ac6448efa71164881d9ff3f2df041d76be84e22 +size 74865 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15cc80d41e73505959973b4ad1fb2d87dfa1f457 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de09241b43f94b83b9f9d65a05dc38a67084f5d99e4ae31411cae494c783fa67 +size 26547 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7db6b209bdbf5f43293e0b4c04d59bfcb997409 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7479133817390f0d613d4857f1e5d6067fe697ac5b4cc6e2555a9fb8b49f473c +size 44087 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e172ddb01e4914f0ca7abb7a5f46646bbe1da8d9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68fbe7658b6d32a7129d708f7af73c8d83967c9a09acaea552aacd4dfc40ca00 +size 43501 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..855201f00a98d59b12312013a32c4cb9fde0ef0c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd0bcfdf63f09109f2719936a10a4b02f6633f9c66a4d23afcfbdd0371b817d +size 64554 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00f6791e3778ee28fcf6b080104a3c565af38a7d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452a5ef2a69f05e69f617943a0cf93e1609216930cdfe957881ca96558edbed6 +size 71169 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7059918c8df775a0302c1addcdbba3ea433146b0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7035adcda16c5f2084392b2c7bb48fdd7c5d7820cc3af803e0611c3df6314e0c +size 36507 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea092f1e175aefad498b0e68fdce71a4649069c8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1bcbc33fca3fdb03d5405eb1919bc265ead58b83396dc27a59eb757ab49ebce +size 47487 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a47de60ddfce2549630a574392878cf969ea423 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:451054d5f2924a866f98fd6bf2ab584f392eb5e43aa5dfea0a7dcdcfa17ddfdc +size 63708 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16a57061fa0fdb9af8c8a2866e7dce9d86bc3c08 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a9887d26a1b7b896e100ac7468cb4b26aab772163b9e166e7078ef55b9d82b1 +size 33017 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bce74cced9e05b65d548117874aa1ef2d222a554 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503c3e37418334f1b26715ecec355a55a7bc86abb3f01426c2152afdfb1409cc +size 35412 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a17bd4fd662ef16ac2bfdcb603411de2b0f0d73 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9a7f4ecd40e97487d2d315ef8ee50f3a738a883af4da341caf3d94ce9d84066 +size 32878 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c678ba2f612bd2a58f223bb274a6cb2380788b28 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4048195efa5f63044ae953c563036972c2d950b2a61aa23264937bca48d6d7ac +size 18971 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32c6a35c424b90a6491d04e9ad1566f1f7b71aca --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6358517a3657d09e942fa336d9705713533bdb7390313a2d328e934f5fe86750 +size 20790 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57e00cc6e8455fe0c98d0223bfb75058ce741810 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cab7e935fa9dc21c4d78ea436d4d68eca7b25ebe109dc18c3bab1789c6a2420 +size 13301 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10a4f70c13631dedfd5285f52cb53eae4e3e39e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11e9f6e203e0a5c446111d3c8c3706cf32ba97898024c534aaa295144438d28d +size 33214 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0500f0f6970704cdc87df27556cc0fb7d99dfd70 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26176e58902aa6595484d7a3ba7fed9cc3661a914eaa6392f69cee1ca3dad15 +size 43364 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9371d5871a9e64ba4f2add20e2a9e2e1acde04a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4526a6d3396c8c5d38831b266cb61294980da032ff61ff3441a5c210129a6d +size 60270 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de6b0667ee1ce4ddfe1af61e5b11773b3fdf3573 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df55bf08d584534048bb08cd9625d2f787d56fb84b20ad3036d3694ad7f3fb8 +size 28126 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d2cfab471241ad441b95d01a7f61e5d0d7b919e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7529bdea1eed54177923cd3a0b5ea1ea49f58b97a379a43da9bcbced73d02f +size 14661 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05e8948fd4e54e7ac6e1e027497b12dddc720596 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5813ba6c2d20475d165eed357455d409f4d8cad2c8465d730ecffac4128191 +size 45055 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43edd33e00f8ed6cad1ee58a298ab08081734c15 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ad1f08f4713d84ec89182bf8a586e50e6289a8cd5c2af7aa340878c6ac6efca +size 54239 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b537cb6d6a0ae4780b4a200e7029967e2ee32bc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5160c9e3df215dc8131647e16bd488415498d5f07bf910180f8c96bc9d22d5f1 +size 74402 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a58f3b85dfa036c424e8aee98f5ee4e8a8d0c8a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b165ae48f71d08733fde830374c73c8238e3e79f393450f2a7052cb322ab56 +size 34975 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2c8a2640def69e195e210cf6b87e527da1bb381 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63ebd4f8186714ed15650e800a391480b2df49e289b0bc1f532c6fb4964775d2 +size 39057 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e62b8e60a57e1c07c9c6f9e02cdd8817311464f4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286ded94ad1e42b2959e1d68b5abc0f48e7ac37a1237306e8347edea582faab9 +size 87708 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_351/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_351/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..baf4a8858c318c79d17e0874140d5503b150a4ec --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_351/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb70be3b5b4e6d68ff47c9ffae93620f15c38a0005bbdb9a3be56a0af1b5315b +size 20977 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e55802715087aedee6e5070090042752a487710 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eec1771912624faf50c2b1fa6cc1acd9d18c31e1c08d36f1cdb67b4bf46bcfbe +size 15981 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e434c8b6e72e255b6f364430f31ed29d72b2a1c9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32a927fc4a43d3ae2791ecb1f0d748e78f12c5f19f173766b2673740e2007d87 +size 75063 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef80e046ec6b8fe5ed86a8ecf02839dc60d0337d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bf541ad86c6fd738e91dc87e02b21aebcc0cb892d95fb5aa501f64f39426a3e +size 26599 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14bd06d1059616d62efc9216f7c07f58c5cec022 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e093a31aec9d6c32ce96dfbac25baa11849220ff6f9096b69633833e2336c8 +size 43611 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1ca77c30ce06c444d1854a993ae0d23faab89b5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22391dd615100e69680486aa8621d4c7e2c7731e3cf14e476ce7d54df22df9ab +size 44135 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a0fb8c0fa76c0bbc4ca0a4b9b11383ade27d296 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a56b282c613fa216fd9babff9df0cb0b57a146ee5c6bcde49e71cc82159221f +size 63764 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..947b59c1ffde6341b1de528309bebb5babfdaf61 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3859b318c50b7977b87e48d6d01c3aec7e468a6351075a7cc2aeee4bfd6f5d56 +size 68332 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82e249031425f175ec769f3b77ddc751f5897c44 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdd391ada72aca4dd1f1dee15f528d5eae4e51dab39af29d81a836b643343d31 +size 36193 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d1386f0437d0a510845124f6e0aaf600ee69c85 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a69f51e741d919435b7a9b11905a03ccb79a8ea671fc01444f2869c5ce684247 +size 47718 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d36208413bb10f67d002ba3cf3a0f1caa2baf406 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd900fbd03bb7ac0990395ed6d9456d4dd704db6e296ca4cffe41d555764009 +size 64010 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e369181fef9719734007f4381673f3b7f132fe64 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03dc898c5b0070abf9bb3550a17ac1a0090ed2ba47d1571080435ad22814b39e +size 32949 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2050293f45b284d61c9bd19b581e7cf2e4fa61f4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a997a5dccbb549c1e585f7c5bb247744304a1ef61074e482e379b45e451ffd01 +size 34672 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91cf88717719e95b402b992120ac722bc05a8c64 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df71be0678219e99eb3afb6ea6ae07b294bdc7ee706151ed62a9ab4206d711c8 +size 34554 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a3af11de1a266a3f0560f0a5e64728a79df3c8a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b433a093d23d83aaaade8a2193e1fa925a00acb8fe2a2bec508bce1748d4db8 +size 19005 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..420a1d0a2b56ac51a859c0e592ee66c17b12f5af --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5483de27e384f75aea7320900b3c8c391f054be0e680d3d68d2b875142edc03 +size 20833 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32d710316551d4b080175dc8ac7d482b003873d0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5192f1f186914c06f18a394cd4d2fc8919e05ed3e153d8492881c857312f622 +size 13417 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ec02d95f3f2783ca7dbd36e0d2d166d16f309aa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f4e478cf4504506cae52d42a3f7813ea9eaf4ef7c1238da876eb79e9c21605 +size 32790 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f82b0491e6b85885f5de728a203afd3b691bb7fa --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e0d3f23b65995a00fd27fba9cfcfb8846f6575e2b4b4337e26147ab6843c74 +size 42923 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7514de90b52517a46c9b7731f64cd3922815629 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d9b71a9ca36a5f0bf8e9fd95443388cdf19c9f6177af7daee4f4e8ea54538ae +size 61381 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..babc71bc5a29f869491731474d4c24707daa93c5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8deb8151fb637ae4bb89cae68febb90601f29b06449756db1ef7ed2d75a44cc +size 27871 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c317a15634008a6785d63fd27b7da678dab240d4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9cbc4c6fc116162aa230966665f99115b8efd615895d4a48f2ba9a373553b66 +size 14683 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ab7dac2668001f78ca7fc48b24578bc9a03ed71 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959f941d2c4151fce3895d7ca655d309eddd01416478d35314abe04b91ceeccc +size 45043 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6965d54b4995737ea72e8771ca6b6acfca4069f0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e01348c5e54eb60779d06b022cebf0a630cbf4aff27cdde98e9c29d3302d9f8 +size 54187 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bf5192ea83af590aa5a5102ba0cc61c00dfcf90 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ad047ef4f63ca5e682c33e600c009dd8ab678b66753c1e60743add540f1b04 +size 74293 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3e51887d49ebdf354ec15e5cfd09557e116a9d2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55699d3ea87326f776f9bd856af62cec63efa8eb57e9733c6c7ab69f857fac9a +size 35011 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a23c16d522ed130e98e9931ef258363058faf0cb --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98332d26e6271a1a795474e37e4358771f1ff8ce04628a76f373aad38ba467af +size 39075 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d9cb9fca02463f97b752963ffbe7431223bdfe9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ff4b1451211bd90c57da87c498b5ce71272e4382cfa7bd04d7078ca140397c +size 88627 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_354/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_354/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff35d0ff1ae0b5c01960821eaefce26ea86bb989 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_354/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e475843a20e0ad9afbc4190706dc5f5a74d800a68afa2bd811dcbbefbd376c +size 20997 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0638fa546d51e08c740ed217387f4c983d53f9e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7569c5f1228e7272405005990c97c1ded14ccef6dd05e2199a50d8085e1d56f +size 15825 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b011461072ad2d5f98957ab265640b209c03f96d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:744046287771234492bb55d787779127b98b1ad8289db3d1ca6ff4601e4df092 +size 75887 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22dbe3549be2825a980d7f1f797264bba6614d99 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a330a4555baa799541507f7f883cd1a56ca823a06067c9d933e58c63ac5ec93 +size 26649 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fad779419413cc4199216d3f1cfc09f8c2a1f04 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86f1f320393f1dddcae0f4652d2db39dd0f9bc43a44ab7961d69ea3ae1e6eb5e +size 43291 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..507b975ae84470485da9b8d7d495c99217425c82 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28cfa6b2a1a7b77a78af4d786f30f5ae99e277005cefb893deeb11b9dc773c4b +size 43108 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ddf91bde8f5da42341f920dacb54b593882a9760 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:586a0acee7b7c8eafdc298464c661d3a3c8b1895f31272701be3b874373e18b7 +size 63702 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04edb7dc724ceb3daaa248f4f9c661dde4744847 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed582984762077c5bd65179af79e764af05c6d08c88c9f240f647a8fb396a2e +size 72945 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1745419057eee1981a7685b4a61ea85f2a178c8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37c515d2708c7d32479c4129bae81b32c6aa5bc930cefcfc5f337ee1cf47c1d1 +size 36464 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36f17df00fc87156509d38386bf1f5ddef09b503 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2930ab2884a0c3cc325098e0be675bf695bc8a319d12706a3335168fec4197dd +size 47320 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2292e9e06eced675805f2238237f950aa2d6ac3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3d1eed2d507e72003c07833f70020cb76a42ac7c7c9770c13df9a37ba590485 +size 64515 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0dc4d5040f4a0de7df1bc160707b4aa975eba4e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:993714eac8f7841d4a2c884289ab2895eaf92ef2a46f3ec7afde0ef8c501e61c +size 33108 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92aae5a76fb3840236fb9415f2cc2570d071dcbe --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4348a16da5b3738bc361affebf66699ce6bb8fecc56b0587c56341cb523b4d3e +size 34710 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6928f5a490c5bb306a02b83641a156b7bbb073e9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e3662bee06c6c29156f9a4e95c0f7e2a0152d00578ca6acf33e71ba27e80dba +size 34467 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9353ab9a17ee82ad8d41da820aa38479260e9e1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f5f6ccfb13cc17d1e54108c85f99c4724ebdc47c3328ebccc9623ffb4ba1057 +size 18821 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4dbf05e7362b009d425f58bd7f4a311e5115c0e7 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d1162b3905cefef68d03dd85136e36ad60651bab2a7b1575f5ce15603d91171 +size 20738 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4118199ddc1c0251f8e6167c4e5af3c5768b99f8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fe83efcb1ccc82148fe455baddb1da5e084552151d9d47dd511b81a43957861 +size 13301 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd2f49fccc9fc33c62b728131c1536900240a3f9 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89307204aa49f72cf3f9ab6cfce209c5a1fe741e9522b70769e9976fa99e6662 +size 32272 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af91f970127f8e00e7cf9bcd0924cb4750d8bf3b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:962b71f5584a3ff4ee17172aaa8908b05a6d00bdb3e545f2817bf35c7703e092 +size 42387 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f3a7776532497d5744af7eda9140edd6ee19115 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45454042cdf0d84e165de192fd8219be66e7a60e71e72d8ead0007a40ca7d06 +size 60315 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2711522459bb9bc87732e6c644ecfa8ce585486d --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b9581462bab3c1a42f6671039b32993024ce95fc9ab4aed3e5b367b833bbd2 +size 28297 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f055365b98e5eb9ef9d6c31ef0cc4856b2a1b829 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:226e20f5188ab4a86751f5a1d9c0bf1c5a86e27d1bbb22bad284dc3c33da1164 +size 14598 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e944eb90482a9c1c0127226ec640d0d100349f5 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135ce43a47de652e5c71333f2ae2ca8af9fa5ffd531d1f2e70f982ba0666ec89 +size 45055 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52c70005f41972dcddd1e6449ad52b6b0a68377c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d0349a19fae1c980d479ab82c490615fc7063922c31d484d2084a5864a1bbc +size 54252 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d08d149511c94fd1e0780db37ca24d2524eb34ba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60000abb47edd37e91d7944a7620ed171f41ff5d9ff72f014220e000fef4b7f8 +size 74304 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a566bfcfc166f6280a2f408d0fb0ed9e874003a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:749fc5ec75efd8e6173bcd4b6c413c9d1779a494f02d5ea96e3c39bf5e083315 +size 35027 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4eab24384ab31dd6087749450d6f4e4ec72b6ced --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a632ad4e7c634626eaed52a1e6b63086f57f20154c6a2dc29d52d728f432901 +size 39075 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e834c5327a2a2a9aed2bb63f701b0ad261956d47 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:031bb49932d2b36f5c2a78b57c1b6337fa8ae3e76919c95160da7bac2b44177d +size 86537 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_357/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_357/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3eed989fbdd3076a502b72377c600c540e67097f --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_357/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a1b4bc41ec2219e5148ff5e9e58c1f50a739b2a1525b015e81e2db5bebb59c +size 21004 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63ef58396dc047f8a867f88412edb0a596f8e7a6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_boolean_expressions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20542b23740877c86fcc472703a3893ee1478f07978bff4ed031569be9606b62 +size 16061 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4cb3deede3238b1eb1f6067072c2070fab7d4da4 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_causal_judgement.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b8ff4a086a3844d9db6a8b789fd2c4d415e4f32e720a036b60879073046e3fe +size 75569 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_date_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_date_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..381acdcf3db18697048fc8aca826c232eef23b7b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_date_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9992d1b543b1492a0ee23d876a4abb6eb9aa5be7ba98b9960399f6d37cc6af77 +size 26673 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71fc830f71089a4188028502d45c24541d96a5bc --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_disambiguation_qa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d95969f48c5199c4229215b8c7347d1254f5090b2a232727ae4611795c2aa3ce +size 43369 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2cfffd611522e41099eb306690b440c257b7551b --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_dyck_languages.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cffbee1d7f61b08e9ff7b09399501409280c13b2c8dbcc10b1b4c653a60a1ad4 +size 42537 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0bee076840e12d13c6a76a9aea3d489abecfca5a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_formal_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70aab12fb825df85b80bb600047907097021cebc523389c7665a260148fd514e +size 64002 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0483ebe691811a32293243aca5aade0656ab0332 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_geometric_shapes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad5ff7cddf80826137581d714d106e8c502770e660137cd132973054192fdec5 +size 71133 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f04255a6e9eeca58a7c7376fadae42fc8004ba2a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_hyperbaton.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfba491d9674170ecbe976a35e9fcb451449e7b090e0ef845d9b95867b3473c3 +size 36366 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1071eb4177bd038df3b34a9b7abf04728f60d8c0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_logical_deduction_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8849c247ad480296a6f871cd768b77a92de3084d108997412a30fc6f3167c3f6 +size 47410 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f31a23d0bb48dcd51d46d26bc0642c81dcfe2ec0 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_logical_deduction_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5062ee0adb0c55b55c24c550c83fcf9164246a64c26bb3c0914d154852304d6f +size 63894 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21db753bd7106946973c3d522c8d0dd4eee6fa73 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_logical_deduction_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbd86dc438a4e9bc6587170d37b7f3bd29c9bf4a5e7e3dcbfa9146f1fb6fe02a +size 33165 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97e46cd50d8f400de2fd3d8ca5a84044f3b621ab --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_movie_recommendation.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d5a4a0e6972ae697b30af9ff3739d55c2aa37ea05b37e82fb8b8e04785d9234 +size 35066 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edf9a7c082791797d5b34360d9c6050b065d6ae3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_multistep_arithmetic_two.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b78bc9557c782cbe45849789449f4e78fd93ccb8e8f62b1eacabe755847b499b +size 34842 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_navigate.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_navigate.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..847a7adabc538e7e7cc7184cb982e5edc667649c --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_navigate.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c7f5645b8737df20194c1e94c51862882e03e5db8ce605f2c5bc16c28d5a9f +size 18944 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_object_counting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_object_counting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15b0fa360ff9ab5232fcd15eaad035bd613449b3 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_object_counting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e9dd52489a79f2d7d69dac624bd30527049c531c0a4c28f4197899d9ceed4f2 +size 20728 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f8f967afdf02bd76276df1b21836f136b1a2ae8 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_penguins_in_a_table.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9961121f4214cd2edf20b0c54c80688e4bafa0fe36da6a53c806794eadca84d2 +size 13324 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7feb665331f4f246f471ce631811e73659df2fba --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_reasoning_about_colored_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff4f2fc60beddc8675cf25aec54e4c3914032e4a22c574040b19a945e1deff4 +size 32203 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_ruin_names.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_ruin_names.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93f8d920c18450bf2ffbc7159f8e5808443beb54 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_ruin_names.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5492a3bbe78d549c50ac61fcad934214364ba11314518a5110fdc18ad543dd36 +size 43519 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2091431d3e75f2a72639fbfae3c95756d27a27ac --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_salient_translation_error_detection.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca497fea12b4001d99f539c67c0e5820324f90c9158a77b0b9fad158fe196f1 +size 60543 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_snarks.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_snarks.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14d01561a2eb0e9de0d403ee7ccaf67639ad11d2 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_snarks.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b76c81837c411639bb337f438f218d02d1120023337adfd3b51692277cef25ee +size 28074 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2118fc38f8cf96cc16ed9f3a8bbf1d16f1d16ef1 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_sports_understanding.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:648565e5830d799e13d073d91083f307d33af7131c43e35d3248434ad2f4b5fe +size 14669 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dedc7301234ce395287db7a1f6df6780c538dbf6 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_temporal_sequences.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29f480892cb07b1998f5e1143fb661a93d2d19ec917c097d5c128239c3badc42 +size 45051 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c4e1f833bef6783133b98cc5ecddbd4ec840905 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_tracking_shuffled_objects_five_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:443abcf4168c9bf215ff1fbd9e3a76b7f0886bf98e2f6c12e2d3c49ab2d72fb2 +size 54280 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3412ea3c04c8a3cbcb2bbdc5be66ccb8b7b2cd4e --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_tracking_shuffled_objects_seven_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c74768ac5ce9d632d46c2170ced2fc2aadebc780c70c492dba1fc2cdc6439318 +size 74329 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe2db990707e534a5780f01aa4d37de1adf4562a --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_tracking_shuffled_objects_three_objects.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b80d5faf43011b853c04992e5c89bdd463dd04c26e50900c03bb2336f26913 +size 34848 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab42cfe1b56784f5b9a7a8548b7a73873f1b6a53 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_web_of_lies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f223388e953b411ee79cfb26af4e2d533bff69c1af15ea9e8cb74434531d2a +size 39061 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_word_sorting.jsonl.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_word_sorting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..372f15870d44b6f1a0fd066ed1cf1089bff2c256 --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/bbh_cot_fewshot_word_sorting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b73829791dba579fb4c18036ad3fd3ce788962156c8729cc625fda07cff9509 +size 87716 diff --git a/eval-results/bbh_cot_fewshot/0/ckpt_360/results.json.tar.gz b/eval-results/bbh_cot_fewshot/0/ckpt_360/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..063b782bd702fb62fc5454e9a7a4d64ab391dbaf --- /dev/null +++ b/eval-results/bbh_cot_fewshot/0/ckpt_360/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df26cbc52b4c160b63ad587d7f8b859ca43bee2c6f454eb23cd897b912485e7d +size 20985 diff --git a/eval-results/gsm8k/5/ckpt_003/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_003/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a18e2fbe17fbdf7e0f0a57e54910110a79a65a2 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_003/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09e1db00c5cdc8a57fe9dd252a3d371a00a1ef4e38ef52e284b3b6cfb9959bc +size 3033859 diff --git a/eval-results/gsm8k/5/ckpt_003/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_003/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f1a9341d384afdb00fb8eeb11b4cd8f47aadd45 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_003/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d43eeeb2167c021030e3cfbd78a0332df07c539ec00469f278b8b5080cafd5a9 +size 2916 diff --git a/eval-results/gsm8k/5/ckpt_006/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_006/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e09d369a6b3c29530c2ade33677337ad64d5712c --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_006/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa32f5e656f28f1b34a0a646328f38021e78b0c3bc44bd367d9e60948a19405 +size 3082163 diff --git a/eval-results/gsm8k/5/ckpt_006/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_006/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba8576e9af7c8c72d0244ea0bd1192ac0ad8c58e --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_006/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3df8c7b310887b6b381de37f54db82177591b68b064c695804a997070c010fa +size 2923 diff --git a/eval-results/gsm8k/5/ckpt_009/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_009/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5782cf3eff9df091fbe29a50693b04bdb6d2bdf0 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_009/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4951d86d0692b8944b74ed7f3d5562e813f69b13f280ab6fed9526233d520ac +size 3093075 diff --git a/eval-results/gsm8k/5/ckpt_009/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_009/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..699e8c89855298a6cd13d1bb1872ba18c9e772b1 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_009/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33867d6211f658ee616c5ef9f317f9f38209cb99aa06ed81f0e6b513ac4f875 +size 2925 diff --git a/eval-results/gsm8k/5/ckpt_012/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_012/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bcd05193027aa3f67761a672f18307d828388bfd --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_012/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10fb50dd75c830656c26593b92302c887df0781f2c545529ffe7c2efea7ca961 +size 3107297 diff --git a/eval-results/gsm8k/5/ckpt_012/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_012/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88851f533a6e6f7a11fbcf28f75c31a597e270c2 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_012/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e586071abbcf3c4d64f74817097a1621d8db2fd9bbdcde0ad2b4700b2a5e7877 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_015/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_015/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..127558d12077bc4209ea56839cfb1cda927e02e6 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_015/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc87c722e5f0c8f904c9d8367a8eb1464a27ec27cbc3604d6509a39d7ddf672c +size 3120576 diff --git a/eval-results/gsm8k/5/ckpt_015/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_015/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43eaddadf7208fc6cbd88fe7ddba564bb1e54196 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_015/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84392278c991654e08ad39dd1ccf1d4ea5775375d2f45aee8ba9802ad1375f1a +size 2923 diff --git a/eval-results/gsm8k/5/ckpt_018/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_018/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1162bea5284457a59371515de256c84b05595288 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_018/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d2f8e29481fe4bf794233bfd39b56d18a14e603c88ebb4d94d2223c1e6467b5 +size 3118585 diff --git a/eval-results/gsm8k/5/ckpt_018/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_018/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71e3c8e8172fbe6da6521c2e4f5c47be59c3fb53 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_018/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b625fc39f5c8d401adeab9db630bef55007f47d6ca00fd4a5b812b36b3d22859 +size 2925 diff --git a/eval-results/gsm8k/5/ckpt_021/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_021/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b872225c3a274faebb93779e19b9947961dede84 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_021/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:559cc690aa26144a4a625d6f1da999b2d737db7fe69cfb27100157e3bd832f97 +size 3128520 diff --git a/eval-results/gsm8k/5/ckpt_021/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_021/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5923edf45ce0992c2bba953cb2bd505c5cee7a5 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_021/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39fa8bdd623242fa0b406ee855fb3f0b61d3a3a78ba2e3c86afffdbf3c7fbd93 +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_024/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_024/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02b98220a78b598854c3ae8a16c91e1eb09e91c6 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_024/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2185761cb72cce9c5a998eba74f873f2fa7757eeaa922eafd05e015b6f37421e +size 3123915 diff --git a/eval-results/gsm8k/5/ckpt_024/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_024/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc43b6b25e1ede450d4eea4ba847e0e7376bcbaf --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_024/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a30316e11507ec6a0cb76da2a7ea38c68e32ceef1fc3c8ba59b83beddf04baf2 +size 2951 diff --git a/eval-results/gsm8k/5/ckpt_027/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_027/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9398c66a8311590b85cb93fc47278246e672f80 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_027/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de50dfeac9df8e1514f9ad8328064cdafbe3b07103740e2f229bf5b38cc0c68 +size 3113163 diff --git a/eval-results/gsm8k/5/ckpt_027/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_027/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2ca7884ac799d81592b4e09a2d61ad80e1c5727 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_027/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b38ab1c511699f5d49ff1f4e616ce11b635f1a86fc17c50f23022f3807007292 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_030/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_030/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b57fd1f8aae87cfa137db20d59ef6346b99b1cc --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_030/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c5e22242f5a8a04e3bb604c2f4ebc45dfc6861a1197f46b017c3ae7bcbcbcb5 +size 3130172 diff --git a/eval-results/gsm8k/5/ckpt_030/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_030/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbcd90a87c99fb0895833f579063f40ee4a644a0 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_030/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:793c60c0e77ab3e9d57c3a7543827da772e8fdd8852c8c2510351a7a6b771c28 +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_033/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_033/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43aff2c27ed700dacb8da24069b3ceb603b79a15 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_033/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e81fd6f9642b4f4bb880d425e4351779c515647310fc07449e30a5335c258ab +size 3122744 diff --git a/eval-results/gsm8k/5/ckpt_033/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_033/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b0f46d7efe1781bcff64b36192a208f2c14c23c --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_033/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b073bd61bd51087ea59b7ff4cae5b9f03999414278d6a95e13b0a54d0ecbb64 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_036/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_036/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..286a2d78e27d353f41c8755035422383e2569fd4 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_036/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d283ecbe15c97f67ae97bf9988727292ed7bc93d2d8532b27652f6a5a88651e +size 3124285 diff --git a/eval-results/gsm8k/5/ckpt_036/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_036/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f306bc72d31a537bfc4f4542a4000c1c8523b12 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_036/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8926dae53fbf4d317c6c49b9d18e3b92c4c2caa727b8600336feec57181eb17e +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_039/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_039/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e7f7e87563d17500e93cd96c58934f166abe4e1 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_039/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b31d41eea31834bfe204b6645ebd3df2b3d746d98dd3507af6e1e7c379cb2c9 +size 3122384 diff --git a/eval-results/gsm8k/5/ckpt_039/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_039/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e32da788295026dceec34b4c39e787ce875a833 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_039/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e4c8b0647e774784f9ac5b66c3a617c8faf9781e2ce0024faa001f5668d8ef6 +size 2919 diff --git a/eval-results/gsm8k/5/ckpt_042/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_042/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e8f4d3ab1e7b6b923e375d508f80542f55d20c4 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_042/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c101c41549ef42335693125449c3bb6fd5f58297ca4aa8775feb84a2be40e4c4 +size 3130116 diff --git a/eval-results/gsm8k/5/ckpt_042/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_042/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73e9f88d8e2e7edbe554a4a0dafdaca572f79a0b --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_042/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afdb0335c7c0db6c37256124c65fbdadbb49193b218e1e49661d8090cf4dc97f +size 2919 diff --git a/eval-results/gsm8k/5/ckpt_045/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_045/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e9240c3d483775e94b6516db14bbe52b61bf876 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_045/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64aff1bef18de270406ad03da0a42da9b6a57639f2478e0012ace5a610d9c5cd +size 3133250 diff --git a/eval-results/gsm8k/5/ckpt_045/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_045/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba27e42bb62817a16c179c14e9f5a5899872a779 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_045/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14b9de272401c57705a862e1be8c850c6103294504d1074db11fa0c08169a34 +size 2917 diff --git a/eval-results/gsm8k/5/ckpt_048/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_048/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1b91840faf6f627afa1a0b7c1f2965339390ce8 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_048/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d3dd07625d45bd7a45f1f20d359f577b7fa9c3bc1db9989a4b0ba47ce06ac65 +size 3131743 diff --git a/eval-results/gsm8k/5/ckpt_048/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_048/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ab9abee8b50be57d820e18ac10371ed2cfeb145 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_048/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c94fd751dfb7e7a0aa80c2999e3a6d11404e7c504c213a5ca0cbaa9fc93fc2 +size 2923 diff --git a/eval-results/gsm8k/5/ckpt_051/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_051/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a145673a7922f0c5ed377c944130202b235c110 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_051/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2491b61d6aa837b6ffa025984ce7f332d351462a22e8d4d8715d030cf1acc68 +size 3136338 diff --git a/eval-results/gsm8k/5/ckpt_051/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_051/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23a72ac7c4051c6b135008b7cb7a3fe85ebd2830 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_051/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec74268b841e8b8d7a9af274c7c2e1adab85f36fed66898942e42abf841095e5 +size 2919 diff --git a/eval-results/gsm8k/5/ckpt_054/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_054/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02714bfaaf60365adf236f6894c0b51648186875 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_054/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0da11092b250a9cf6a7868298b8cbba0704042bb610ed637facaeb0787610a6 +size 3131769 diff --git a/eval-results/gsm8k/5/ckpt_054/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_054/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29a7e559b4ec470d70e60982deaa4d73ec1afc04 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_054/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a9d94e0463527bb4520edb5d98ec4cea923286e05daf8aa7dc30e9aa69d3971 +size 2918 diff --git a/eval-results/gsm8k/5/ckpt_057/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_057/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd2c8fcd93fb1bd8f2b471d05095b20aae72fa96 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_057/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4159f139af5202f7917d998a53ba7541fc1820314622a02a3e1f0377c11906 +size 3130834 diff --git a/eval-results/gsm8k/5/ckpt_057/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_057/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce86d557b3f75f9f086870a9b026c49713f1133a --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_057/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79407f753597bea1d7f981f569f180735422ad0c23a1296374f97b44c0a50e3f +size 2925 diff --git a/eval-results/gsm8k/5/ckpt_060/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_060/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85e0d37d7cda8e82a4d8fd9ec1ac8c1771406a37 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_060/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d4820223d33480538ea9982a66c67eeb271e061f3c9c0beff20301bb7bd6ff +size 3136252 diff --git a/eval-results/gsm8k/5/ckpt_060/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_060/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d9bb2cab091cbbc362758f37ade83bdd92e0786 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_060/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c688f798a6ea9fc1ea3f91c77a13f2851c826f8dbcd86928de657c660c222fc +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_063/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_063/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bf627b3784288ba96e90c2f4534238c93e468bc --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_063/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1f963cb766017739981f7b5245ee426756647554971481ee4975257eb5378d +size 3139815 diff --git a/eval-results/gsm8k/5/ckpt_063/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_063/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0969de57b9740d57ed21c56f4c6074ab0821fbba --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_063/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892416f0f0e51f69173adcf62daea68a3a2961632df446c8acccb60a66515f00 +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_066/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_066/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85a8207627ede589285d77655ca4f6463ec7d384 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_066/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d23214a489b48850a15229d225718da3fb1878b9796ce8e4b164a69a15923d8d +size 3133192 diff --git a/eval-results/gsm8k/5/ckpt_066/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_066/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8048b8d86f2f3b1549a5945014ba7fcb3093a04b --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_066/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:927ab2467f06cb711ba0c97229aa2358c6bae766cc8c0e25dc472f3c207a806d +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_069/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_069/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c6017ba732f0d0fd141f4b49acd38e72b86f3da --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_069/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae3dda6871def861b90e956b4837c6af7c39ec9f49f0c29d516650ac3e07a6c +size 3141391 diff --git a/eval-results/gsm8k/5/ckpt_069/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_069/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a241243a1333629e1c9554faac51fe516b3a37bc --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_069/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7059b378f632fc78462781be5dcf11484559e655917d7318ba324e8da7f24e7 +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_072/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_072/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14f0714ff5f39d07b8044abad588f22f17e45a10 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_072/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d0f5aa4d30bc2434531741722faa2f8743576cffeb04beaa29d5c3817dfcfb +size 3133073 diff --git a/eval-results/gsm8k/5/ckpt_072/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_072/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0412c71e6df23d64291610c3aac21cda665da0a5 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_072/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edab0c42529050c3a9df83f241f1b2c85d3e7e0a0d3ad4601a4de649a30e2e58 +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_075/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_075/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..188680a66f7cfe9fd37504a36a74e8e2c1f967c0 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_075/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:122f16bfe3dcd893b956568316f95cfc3daf5285d9e3c4199cfff140189bb2ab +size 3134258 diff --git a/eval-results/gsm8k/5/ckpt_075/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_075/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..010e625f99c7c16e22c8a0a750a30058a6f9a642 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_075/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0cfb0576ce61cda825cd290d0e8b77d07de79cbed9547d62839873845404abe +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_078/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_078/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98391f837f2600124b0f77ea5e9ac5fef583fdfc --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_078/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bbc7d2713051d880a75403c3f1a7decac55c6c03cb693ae12f8532577fde71f +size 3139094 diff --git a/eval-results/gsm8k/5/ckpt_078/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_078/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5339d8e69ed002c11a694d2ea355d00b4cfc01f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_078/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32afdd4ca59377ac894c3f6d5694b789e0fb6ef2d01e1178833bb1101207f21e +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_081/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_081/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34b60f60db19fdeb5ded153f5e5976c6058162b6 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_081/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124fcc587093429deff040fa4fe333962e967c35ecb4ad9ef082075fcbe55cd5 +size 3121776 diff --git a/eval-results/gsm8k/5/ckpt_081/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_081/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed9e290f71a0e353873942a94dde00eefea959d2 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_081/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fa141b94209e4218c143abb67f9904b1794bd92ca4879ecf28edc34abaecd9b +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_084/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_084/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8aafabc0a2ddfc38e55c69759541871f8ab4deda --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_084/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f894fc11ac725cbdd0e3579b3f63eeb92fe52b94cd69f49e16b660742f0170 +size 3140644 diff --git a/eval-results/gsm8k/5/ckpt_084/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_084/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90582f4e6a835a5cfcd88f09102093f52dc277b1 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_084/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c83d0c1fc93a02363729cd3d477de909e6cda542eb00625cb685d08c4ce0b440 +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_087/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_087/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ec1177a2e17de4a0bebd9bf7e945d0ad41d73b8 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_087/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a24e36e0dc9d657c81df8635494bd3ec39f7fcb0f7a5b106e45322543341b5c +size 3124915 diff --git a/eval-results/gsm8k/5/ckpt_087/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_087/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b84d1fccaf1a79f9747f18d872b6bbfb24462622 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_087/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ca36e6db78ba4bf36a1d1e321c34760e0d5e89e5dcca4fcef65164d52fe93c +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_090/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_090/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54c18612c1347797132ba20e386e57afbabc816b --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_090/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d19dd5793fb09f06baca10a899c0cdcbe49bfe455ff8762edbeec176ab0b53a +size 3143812 diff --git a/eval-results/gsm8k/5/ckpt_090/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_090/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9394af03b5f106d4571b6910b7f181a2e172de5 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_090/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4fad5d8da8d3853028b75f310b8d5698f136e22c047459d29b0e989144dc9c0 +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_093/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_093/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28d6afaf036434ef136e7f667bf863b096faea9c --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_093/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f187b1bffb27f3696fe3486a2af903d7a673f66e9b5adf219fc4e44981f70a1 +size 3137901 diff --git a/eval-results/gsm8k/5/ckpt_093/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_093/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..352d899a48f03004c17a5f467e6254cf885047b6 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_093/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7140b7c5018c51a9791c41ef228f03b87f623f3424e1abb94cdd8754f0d47246 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_096/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_096/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4c6d04b744b844aba5f62756e1ad5d6083ba620 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_096/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4861fa2bc295d08e708ad88fdfd1c27505d262902ca4c34cf03252f670cccc +size 3131394 diff --git a/eval-results/gsm8k/5/ckpt_096/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_096/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..168ce1b5c7573875e3c61ef88820b4cb091c5db5 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_096/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40899be3772730ea7e6e73b3c20a0819ba5d9b0463068fa07a4bab435d45da7 +size 2919 diff --git a/eval-results/gsm8k/5/ckpt_099/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_099/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..673e3feffa8d3f6e435e59515aa0fb611cb1695f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_099/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f4f158215866f6566769fa98802286c939b32482dc6fccf9fea0bd88c1ef65f +size 3132482 diff --git a/eval-results/gsm8k/5/ckpt_099/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_099/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..224b7fb5aad510e73afd405795c57af2f042eccf --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_099/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3c67ee6d15549b0327600ec0a72ead3db2c1122f83e65dea6c75d2b6a8ed1c3 +size 2925 diff --git a/eval-results/gsm8k/5/ckpt_102/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_102/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82feebe562280e8556f007f96f731d3a343bce38 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_102/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ff2a29bdb8e4c47dd2c8ed81cb453eaf5aeaee1966add23d3b07dea98cb490d +size 3157862 diff --git a/eval-results/gsm8k/5/ckpt_102/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_102/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..126efe73b7204b3bb220439b167513805b67f754 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_102/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:952457391e4be9e1a7a707c46cf3215999ea81ca762764b294c6f13ea967062c +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_105/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_105/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62eafe096bbef189e92c6ba3a8f7540b969459a7 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_105/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7992fd13bf5200c9770e99647f9ef6f92fea26d652f8cb2f6eb4549669ae764d +size 3142264 diff --git a/eval-results/gsm8k/5/ckpt_105/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_105/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17d389dd711a15e5fbfb97ccedb1561a3ea8fe77 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_105/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61921339d7ebf5c789c73fdd50d4f82c0c15a354b7e5d0b65cd5e499b9c04d1 +size 2923 diff --git a/eval-results/gsm8k/5/ckpt_108/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_108/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb3bc162800a5a3f8d968d37a066423212655fde --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_108/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5899e587e5ffa9f62e110482dc37472d4ff95722a8667eecc2775250e8fc7b31 +size 3139210 diff --git a/eval-results/gsm8k/5/ckpt_108/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_108/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6591740210deef8f281a970430cffe58ce129f66 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_108/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f676b5f866c205d625ee37aea3a47fa63377b0338577a744c84456432d0b9fb8 +size 2954 diff --git a/eval-results/gsm8k/5/ckpt_111/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_111/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..577f2567fdb8cbc022066fa43552bfb1f80d96ed --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_111/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82ee2455d6f89a1cfe352183d0e5f149ab7cac02e4fc1cc55484e7feb2df766 +size 3142509 diff --git a/eval-results/gsm8k/5/ckpt_111/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_111/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4790881b95e1e8cd5b89e8f43091797e9abb9775 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_111/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9a955169065f5ecf4d69ba4e09a93b20045f3b1bfdf5215e16517354d2be074 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_114/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_114/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..072336e60a9d3a21c6d64ed8e0c7f60751587897 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_114/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7576d0b09c820b7e1a68442d26b1c91d78135c2ac30bd2861a53dabb5b011fca +size 3130199 diff --git a/eval-results/gsm8k/5/ckpt_114/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_114/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a766364d79c036e916be49add8b8dd56b4f9e8d --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_114/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:639a971083569c4adc3a694e9574ad6901e213403866b1b215bc701c50765826 +size 2925 diff --git a/eval-results/gsm8k/5/ckpt_117/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_117/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bee86c8351bb88a6109edea1c41e34ce2d507696 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_117/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a73bed9a3270f70bedddaa1766b1cea470ddf6334b4f6f5d07c12988612b35a6 +size 3141724 diff --git a/eval-results/gsm8k/5/ckpt_117/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_117/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8406fb8459abae2018c60b473f84b0636611de8a --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_117/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ecf18c2576aab04e524f2a04241b929cc3d86fe018d6ab49db735140d49f24e +size 2951 diff --git a/eval-results/gsm8k/5/ckpt_120/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_120/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c2231992d4614ffc952676def4b7fa347aa0b10 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_120/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56fc2f93d0d9f086b8e232addf20ca2541a56188d1263201f5b3bdf96d2b0e8b +size 3140625 diff --git a/eval-results/gsm8k/5/ckpt_120/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_120/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..690a7879f77a23d6fab6c0f1026b58c38e85da23 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_120/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa42cb2df211fa6192b5c6f0e62502866528cd1232d4d75e75999d55d301a144 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_123/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_123/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de6fb190a994a518a659e216b723014907811813 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_123/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b6d0535cf426c64139b087c7d731fab96248e9cff1699af6db1d84f1ecfa56 +size 3131442 diff --git a/eval-results/gsm8k/5/ckpt_123/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_123/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6b93dc3412525162b6c7dde839d545c64ea6374 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_123/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aec0ed4269a38b40e341adbb33842c461e25ff95bb8a821df619faa4e1513ed8 +size 2919 diff --git a/eval-results/gsm8k/5/ckpt_126/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_126/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42b8106c3de98935d546d36587e1aebe3f9e2d52 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_126/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66ef6413f2c7d731b04944b4eb46249dd9cfa77b69a2f4f060c994bb4f43c3a1 +size 3136324 diff --git a/eval-results/gsm8k/5/ckpt_126/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_126/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e6af8f1f4b69905346978087781af2f6723e617 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_126/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b87662908127d4348ac9e278fcb924919cb09244363dd39c06f260272bf0dbb +size 2925 diff --git a/eval-results/gsm8k/5/ckpt_129/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_129/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5f9f67e08cf55fe79f0d78d7beb194a448d921e --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_129/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c67d1adbf681c334ab29900e1a5894bbb15807f0ab2b79d0e9f75f69b71a6aa7 +size 3138937 diff --git a/eval-results/gsm8k/5/ckpt_129/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_129/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cab7e407d454eac3cdfb68568b71d52e9dc604b2 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_129/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91da263aede6ce01b0545aa14d5b443615c05ce3ac829383e4ed371331b76cbe +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_132/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_132/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd6e0ad46dee885159e70c8362860746540d03c3 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_132/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:923741b4b8019c71b8b1b6ee2d3a3d33ee9d00216b606c6e9dfc63f0777bf0df +size 3132473 diff --git a/eval-results/gsm8k/5/ckpt_132/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_132/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9775bdf9991e5c120fd53b22146c81da7a7c7a7b --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_132/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68272e53d9a05beed7a6e950c2523db9cdae0a251d0b7d88cfd649de83d1b0f1 +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_135/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_135/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d0497bbdd6c3015ea52d4da0746b9779e889bca --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_135/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f4fe163895077af2867724437aec79401a78c53d5e95c17a5f534cbf7b95c4f +size 3139545 diff --git a/eval-results/gsm8k/5/ckpt_135/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_135/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84e1b6f73a6eb9101bcd81c3730ad120ef314fce --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_135/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cd3db9afb2d892fb9640254ff5022bc9072da420f302372137475e2978f7d00 +size 2917 diff --git a/eval-results/gsm8k/5/ckpt_138/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_138/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57599744c634b8eaee1c658564e7b686e3967c3a --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_138/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b20f2b157cd81b1f2faeb0dea528b444c69c74f107e9f6408d033cf2ac490d13 +size 3149045 diff --git a/eval-results/gsm8k/5/ckpt_138/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_138/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5655ce46c7d123313528fe53b151f769d33aa2a --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_138/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb5de4a1ed290b866d72cf8abe20cc4d7defb4268576e9954196dbdafd931bb +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_141/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_141/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f60ae9a4a35d0d7bc7a0ea722dd8e6c11fad75e --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_141/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80cc81d543dd7fa2a54a30c5c876841e12df8c15dc5ccb3375005093af719500 +size 3144735 diff --git a/eval-results/gsm8k/5/ckpt_141/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_141/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..def9a37e8cb10ca08f438700843635faaf5f543c --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_141/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2b4c11c6cfc1d3cde13a6f89a29cae72f08dc659ab61dca25955d8911acb621 +size 2919 diff --git a/eval-results/gsm8k/5/ckpt_144/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_144/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc8c9738a8910902f7374e4618e64315c8f8d659 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_144/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f4198dea44cecf168e58cbad253a9e2ff48c99a3c3f2b188ddca3863d3c5c1 +size 3135496 diff --git a/eval-results/gsm8k/5/ckpt_144/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_144/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83e4e24386e1e4bef8acbcb467312e7b124e7911 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_144/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9b6ff90e2c6bc5dce0d396cd5f2600b4109238b679be1f8e5c8f1f9e9adf9c +size 2918 diff --git a/eval-results/gsm8k/5/ckpt_147/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_147/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d08015bccda91e8acbaccc179db7babf72a3bf7 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_147/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d0c884e0b9a5c43c5fa329c1ab12c66a454b27595359bb4da668dbca219ffd8 +size 3143310 diff --git a/eval-results/gsm8k/5/ckpt_147/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_147/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00f1584686728aa054d147b7777d285eb714c653 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_147/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a514f965914b6fd61834e3fb25b3412260bbf21a5ee56a05dc17c54dd29fe340 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_150/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_150/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd66af4af75152abab497733d4eceb1967108204 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_150/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be05b7d8899506bfa45ebc329f0fe51c180a7bd28f69150bb8a83e080d27b5f +size 3143793 diff --git a/eval-results/gsm8k/5/ckpt_150/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_150/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f12c4a638f022256cb210646863f3702dffde873 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_150/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a936e9d1882e5da3a08e4505f60e4ccd6b7aa00ce6ce6106b8e2ca74c75aca +size 2925 diff --git a/eval-results/gsm8k/5/ckpt_153/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_153/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..950cbe1b934af4ca92d892a6487e5740bf4f9349 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_153/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a5c003df0a43ae80d2105b9fdaa0926bb92aa6f7ef8684e1c0a3f20d58f3d15 +size 3142694 diff --git a/eval-results/gsm8k/5/ckpt_153/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_153/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e4b71a194dfc5f5bd058b98a66b51a394d2b550 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_153/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b4d1f9e5ab55dabc41545eea6aca2da059aa7a971e6b1d8d86e46b6b0d85522 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_156/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_156/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b63386cc7b774e7b7fc44caaf97c20e7d9cc42a5 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_156/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a41786e9329d6ae9b981e7bffa0281f781f77a90558a7d675f3cbb91dfb241ec +size 3149146 diff --git a/eval-results/gsm8k/5/ckpt_156/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_156/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..388be235a458db3c6d9a70dce74573999d4b0912 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_156/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8095771aa7d48fc4ce0ee72e2b5b78c2ed3f5876cf8a8f7a1b419761d56026a +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_159/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_159/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d2f98ff14ee02cc9945c677aa6cbf390adf27cd --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_159/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5149e6e90adf65c72e5662ebdd1e3745c558b6e370567acfb5b19dae5142cdf +size 3137784 diff --git a/eval-results/gsm8k/5/ckpt_159/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_159/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad6297690da2d2856204d3248b561447f44b23e6 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_159/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddd2f1626e809b5daf91fc66f539e9417655fdade29b239407563a8cfd338f13 +size 2918 diff --git a/eval-results/gsm8k/5/ckpt_162/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_162/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7645f160922108e3a8fc4e52a539c5d2a7165f64 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_162/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55830465a66aad8ec2e7cb1c69ebb2d23c3f22d4d17c7166eb29244c4abb35fc +size 3147547 diff --git a/eval-results/gsm8k/5/ckpt_162/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_162/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd704b2e1a1c8687dde4a8f016b5e27bc743f78c --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_162/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15670ab919f9602b04cee98aec836c5cdd4e1f902e02ffd57f6862522c59ff53 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_165/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_165/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bca000c3d9367b0410efe680cb132a387700adf4 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_165/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8864467c61733573ff6808a4cae53f363aefa37bdfcbe5e07c52516644a2919 +size 3141088 diff --git a/eval-results/gsm8k/5/ckpt_165/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_165/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68ea93b8f3ec768964f6399552d236da34ae639e --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_165/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa707a15b1bdd7a6f35f22e2fd2c8afac531830844bc655282178685f818c2ed +size 2919 diff --git a/eval-results/gsm8k/5/ckpt_168/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_168/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0897d4a1b75b1b107e3d7215a110423b86b30c72 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_168/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dadf4bd391a56f6f535be238ad09827cae47b0305e6f4a062110b72b8c033c3 +size 3132822 diff --git a/eval-results/gsm8k/5/ckpt_168/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_168/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f6655b0d56c662d446061d9b2693c03cc2a020f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_168/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45bcd7f0c9a698a2e04d2ca6c0ecc80544d48194f4cf0c54a7437bc50e4b0c60 +size 2916 diff --git a/eval-results/gsm8k/5/ckpt_171/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_171/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00dd049fdc0049c257b5a1bbe93aa5a4426d3482 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_171/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa55ec92edad79fa713f1d2b5530886b2c5fd20776ceb8c4705690a88b9bbbc +size 3138344 diff --git a/eval-results/gsm8k/5/ckpt_171/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_171/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96b6e4cce4d479178f7eac8edb703b183705a576 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_171/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c376cd44a1d3ee450c3e5a6fc67921a9df115933d1f2d4aed7a066dc6f710499 +size 2925 diff --git a/eval-results/gsm8k/5/ckpt_174/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_174/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b035422321ffeb5aa62c7e7ae48cdfb05b1fe1ff --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_174/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fba9f8cd58be10adbdac3e72a9dd8fbf02647a22ad30c97f818a04e8969f6b0 +size 3134269 diff --git a/eval-results/gsm8k/5/ckpt_174/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_174/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ecfd06f10eb4f70e57a9fb7e5b283134092f10f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_174/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1866e03ec4bf7f65e0bf5c2566f04c66a013e8777bdecae50bfaca70b98de76 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_177/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_177/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d3a6536dd3f4a049662085b36d5306f5ef162ad --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_177/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a53b1705bcd1c9a8db34d66fec606d061b2be6902df9020557bfc063b2d33a6 +size 3136411 diff --git a/eval-results/gsm8k/5/ckpt_177/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_177/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89256df70df0cea1e554de7b08647b12e89c7b38 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_177/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b9b251474554cedbaaba114fe08a742cf13e03b0b6e321e758a7e382df4280 +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_180/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_180/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b35b39395025a5898aaf84c06d1b838f3171c102 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_180/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40610b94c59b2feefb2767f9e4f39ddb6599f6ab18054f6ff51d79bbacbff643 +size 3131441 diff --git a/eval-results/gsm8k/5/ckpt_180/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_180/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c931bddb7b0a90700d0ef6218daea86a4efbe60a --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_180/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddaaed32d4b9a86d764d7d0fe5c68bc6dd6cdcedce95b2f67a363bcf14da82ee +size 2916 diff --git a/eval-results/gsm8k/5/ckpt_183/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_183/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..892e73b41183450748df80639ebb0acb06454132 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_183/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99fd46ddad6bffa4c49ce1ef48e1db503c9b90d8228b432ae9ea0faedd84a7bf +size 3141304 diff --git a/eval-results/gsm8k/5/ckpt_183/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_183/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62755b146102f5b0a8de1bad582c5e43c9775889 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_183/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee2ad3e2a332c894659b9ec17e7da36abc167961ee7ec15c2ea165b0b0bee5f2 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_186/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_186/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dce71822ce3212135628935d8d3b9cc680909636 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_186/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6a7f5055ca2595a91c1ede367484f6b54051b4b71865f8cd5dfc9c70ee605e0 +size 3137588 diff --git a/eval-results/gsm8k/5/ckpt_186/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_186/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28f5d335e8e247313baa01a3f2bd9d48d171b05b --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_186/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b703779f41eafbe70ce32c836e90504cbc718da4f5cf22baa91b991cc778a7 +size 2916 diff --git a/eval-results/gsm8k/5/ckpt_189/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_189/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb6764d55b7fbee236f56dfb4f2de9c275320b9f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_189/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff26675ec85f2a336db3cf3a22d73a53df2bb6871d2d2197b037ad295d0be92a +size 3140524 diff --git a/eval-results/gsm8k/5/ckpt_189/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_189/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..adce793c9043b25b9c6e50c3a791c185488a667a --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_189/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bfd25f55296f01f62a91b9d300106f7c35a90704bf214861c9d6d1e5dbbe6b3 +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_192/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_192/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3f0b2d1a5b06087b5c0b75a3eea87a38c525fff --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_192/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3591c2edb742aa2dab13b745158e44be6257a6f26519559780231504c169ca4 +size 3135152 diff --git a/eval-results/gsm8k/5/ckpt_192/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_192/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2aa6cc36af5dbb124666557e8d3432d7e206f843 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_192/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb639d2852a0f3f002446acb8b17a531c322eb8d45fa7cbca10682384e163383 +size 2919 diff --git a/eval-results/gsm8k/5/ckpt_195/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_195/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21ee77b9ffeb1f5b4621f3aa82368203a1365aeb --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_195/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7d69b52b527a27287f6d57f2d6aabe904f71757ff18823638cf4dbdd1d967cf +size 3130115 diff --git a/eval-results/gsm8k/5/ckpt_195/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_195/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab09999549fc0afbc27bded93dc77dab3ad25926 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_195/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2964baa762b62043dc29ff8c7c9039f87eb22178b311a7e63086bda77d3052 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_198/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_198/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..639be211f11595b7a423dbd8e0a0d491e3514db5 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_198/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c3994e6f4148b5caf8780249f0c051d30d7ee9eea76d5afdf0f8f40a46f9028 +size 3132911 diff --git a/eval-results/gsm8k/5/ckpt_198/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_198/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ca68ed10fb87cb6c3b8e25c2f4b797aaef95a5d --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_198/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50442547a080c3dded2886bfbbd3b01c3e93f7c2b1f67e97c7a1ff9b90c21ee8 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_201/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_201/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f1332268e07b1530dcfc4ea3da365247202b6dc --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_201/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea969b81f096248896cb3fe24449d452dbeb137650f6b88ff4556bfd53b726a9 +size 3133315 diff --git a/eval-results/gsm8k/5/ckpt_201/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_201/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..828fbb0570de74678e76df5ce515ae1cb359c9be --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_201/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95bd8a25cea0e2fc0693f6564ff4fe287210e14adc419a7cfceff67a07fbdc49 +size 2918 diff --git a/eval-results/gsm8k/5/ckpt_204/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_204/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7abbabb9c4dc9180f0a98eec3a28ca261396c1bd --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_204/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c38fc464f16ad791b730f5077e0a240d657dbcc66ad056fa3ae6283d04557aa4 +size 3126912 diff --git a/eval-results/gsm8k/5/ckpt_204/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_204/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..489fc136d9f1318d222231cfe409977301a85e26 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_204/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b3b0357ec10087ca622ee8f031257c2573ee8b4342ba2b6f278665201a212f0 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_207/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_207/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62b94e137a9aa9d10a05486ee9f186c786d18849 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_207/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a904ee624e6b2b519fd1ec5b11e80153e5bdd2b323f748939d38aec111b1ae +size 3124978 diff --git a/eval-results/gsm8k/5/ckpt_207/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_207/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45d5e3e208223210195c8e2133a2fd2e8d6bb05d --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_207/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6fb18314206bb3636f652f30236309fbd6d5475085e7ef5cf5495ee547e0a8a +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_210/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_210/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..491c8c3c8abadd76979f3d8ef25e09dbaeee34df --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_210/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34e0019341bbc2f7ff1d4046df3cd38ca862e76f679dcc1fefd4a01a7712032e +size 3145222 diff --git a/eval-results/gsm8k/5/ckpt_210/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_210/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67681fd96df7b5feb87f82d93c7fac54ecbe675f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_210/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4899f771a02ac821d79acfb4d6d7ec4bed1a8acdcdc31a67cd68445ff6016e +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_213/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_213/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44ff95ed716e328de9f16f51395d766637b5e26a --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_213/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddbe244da7020b774a909ba4a79553367eb8a546f9df9a6a921fa39a05bf7ff7 +size 3138393 diff --git a/eval-results/gsm8k/5/ckpt_213/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_213/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8a336840e8d5fb11455140497e33fccd0f8ac79 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_213/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f951c194991c6be0f1fc999040db0eb00bc3ea5b32726214ebed1af2776e42e2 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_216/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_216/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62e06e6584422b7df57a2190472c4508fb674ba4 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_216/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8988fb37e411be80dc047f1164243897e6a38a3dbd0d96e2a1219999f56967b4 +size 3137088 diff --git a/eval-results/gsm8k/5/ckpt_216/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_216/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbbbd17453aec1f9da15c3d8a925b16d2a52a96f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_216/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e140ee5dce2eea28af14aa357fbf64868b6f7b2fb332aaabc17142387a4c24 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_219/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_219/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31ff5578a5946ebf0406010b47d79b4faa9fac40 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_219/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14de2fc6510ea4471fe0c5114c6cacd04f6b2475a660d29bac918c39dc4b1fd9 +size 3135765 diff --git a/eval-results/gsm8k/5/ckpt_219/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_219/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee43a297ba795983b253e11b8ce1c72fbaa23df3 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_219/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a5d2b1070ccece94dbf9baaa047d3cd8ec98159c54e660bc42d91a44978dfa2 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_222/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_222/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eccb6ce48d3fcd5e266504af6e88a31176e79ab3 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_222/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee300368b628701443916b29ae0c363314514eaf0d3fb444032d5b32e7ad43d5 +size 3146249 diff --git a/eval-results/gsm8k/5/ckpt_222/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_222/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..553b009f604e1faab01df3893381166ff3427d0e --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_222/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e307435aefdf39c4da7c353dd9351789447aa07800ec989f45bdbdcf6674268e +size 2925 diff --git a/eval-results/gsm8k/5/ckpt_225/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_225/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5568e307935f93b5e886c6b14fa94bc5da4d5527 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_225/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c70f21e659cfc21937b45d335544a7c83d7d04ff8cff99d42a93a8d33c356d05 +size 3137679 diff --git a/eval-results/gsm8k/5/ckpt_225/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_225/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cff89d0caf1afadde72aab64c6bad6b7edf8c210 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_225/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d225e03adfac7dd20c5503b1769e1e2fcccf92052f59dcff36d2dc04289cc166 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_228/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_228/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6325a45022da0fc115d1800e218f8966eecb383e --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_228/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:693a3d805df51985e1823cf3310564c2f39a241172f60ff3ee4888b94b26ad46 +size 3143180 diff --git a/eval-results/gsm8k/5/ckpt_228/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_228/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f79d6f98c31908adaf88f98b53b3dab3a7eac98b --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_228/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e63dbbd2f5199b53b39cd3f0c2711b4e86b7314d89e3ac3fb3460250458ae7 +size 2918 diff --git a/eval-results/gsm8k/5/ckpt_231/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_231/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f51306257c1b5660b034ed9f3118b56835aa8e8 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_231/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42abb354d04121ad2b0382f6269daa14cf438b92b754ca08575df4c5db34e26 +size 3140751 diff --git a/eval-results/gsm8k/5/ckpt_231/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_231/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f3372a3c7731105c21ae356d56b8bae21ceaffb --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_231/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9e81f2dc5a6b3ef961b690368f4a5d41010a322a422afb5f5bcd1411a62086 +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_234/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_234/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..722e2f2696f5867dc6ab6eb08bb266444ad769e2 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_234/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70e623021dd1815b56b10213888f69266476c932d907663d22aa734fde17ebd5 +size 3140574 diff --git a/eval-results/gsm8k/5/ckpt_234/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_234/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3637ad1e56b123abc76fcae174fd9578248f09c --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_234/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a0ed4a94e4e3678b73d4c4fe759cecfd72b64dab9b60b0f65cd720ac194bea +size 2894 diff --git a/eval-results/gsm8k/5/ckpt_237/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_237/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c59dae3246050580b0b7715e64a32d571dd5e2d --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_237/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f7ba510085884f2b5910dfa3972769074259ef5128167af901087a240326f7e +size 3140742 diff --git a/eval-results/gsm8k/5/ckpt_237/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_237/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18abaeafff0a2f66d82129e95583174dc09d33e3 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_237/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea6ea019fc04d9d435712a0686d6c93b883db8de81272f051a9d3602b51b795 +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_240/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_240/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a9c3df5419daf21132e9749d489059cf6db4e95 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_240/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abfd76b93016dbb230dd001017deef6703ba816ef839683eda2912e9e4260f72 +size 3138992 diff --git a/eval-results/gsm8k/5/ckpt_240/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_240/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81cdf49466e99b9ff0be5cb5de2730c3aebe809c --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_240/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:405280c3cdcb89a379e28053bf6920b85879d15cbe8ecf4bd0f8238f882b0842 +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_243/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_243/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12cfb9c22bfb81d01fd48b0f716a9bd17ef4719f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_243/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5282a9e2fa0c10e0e04a40be7328f5925211e8fb291009a5e9ddf91c1579ddb2 +size 3142156 diff --git a/eval-results/gsm8k/5/ckpt_243/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_243/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38c278c72fd2ac7a92ac2fae79e80cc35c8c9bac --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_243/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1341fde8d3508f03351d6b0dbfc1e41391fe93a30121484281cf14a0f300436d +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_246/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_246/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1626b0164ba4751951b37294c81f2370391e898 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_246/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76369ee457e6b4835ec925932d3036d2b9110c1063e5263b20c3726415465686 +size 3143400 diff --git a/eval-results/gsm8k/5/ckpt_246/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_246/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..169e3fffdbe38c9ca11eb8ee1775a6423ab47b57 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_246/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:411e3ae7b4be7d429e10bbffc0469cf225d6a6e57574881c856bf5f9233975fb +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_249/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_249/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3407f5ba7fa61a85d0189f6ca54f8fb4540c718a --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_249/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dac8b15ee44ea2cfa69522514423a436e90811cae5a23009e84947a00420a6a7 +size 3134705 diff --git a/eval-results/gsm8k/5/ckpt_249/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_249/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25eb2a3fff14dcc91743f173a71ec098fea23995 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_249/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e1a6bdc73a02bbde333e37159e2cb5f2363157cd3bebc24fef7b34cb698a221 +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_252/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_252/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..149749975ae7a2d2f62324aa3850c9951ab7aa7c --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_252/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3729d9030a5f3638112dacaca389f3f082b43743155a49bd3ee52103cd8f1000 +size 3137688 diff --git a/eval-results/gsm8k/5/ckpt_252/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_252/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97f4e0c97c8b39646372c348d3db4dbeab78c863 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_252/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e35b66c9d91c6f6503fa2b89ee7bce35f499bae12e4d83a79fbc2d07e536a196 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_255/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_255/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60e4db2804114855bc39f91cacba103ef258c0d7 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_255/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7bd3cf7cf9b7d9f4c71d828c17340a195cf1e83bbc0df1690662f3a20d182c8 +size 3141546 diff --git a/eval-results/gsm8k/5/ckpt_255/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_255/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e54fb725f80d106ebab18e38e3f2d013c028a822 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_255/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b489eae6daae2b16e0c7c0ab99bdc1d052b800f7004e8686334accea2068d992 +size 2916 diff --git a/eval-results/gsm8k/5/ckpt_258/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_258/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b583cc8abf1391ec3939c4358fec5c251bd8d5a6 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_258/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8828c7bb063f2f469069db3fc8e483dfce48ea8a8d4a8547febb62e48bdc4d8a +size 3144220 diff --git a/eval-results/gsm8k/5/ckpt_258/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_258/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..024efafcdb48ff327b3a976b02fc6540e383901c --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_258/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c6b3f44b34ae19dd277b0450cf850224d1ad384f7448224f75c6a09db407f62 +size 2918 diff --git a/eval-results/gsm8k/5/ckpt_261/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_261/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f4f364909fc6e705a2d94d27e9fcd551d9e0b5a --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_261/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d8a714f8a805693c7d18a23f9aa95ec6ee9183b13c9a26abec89b912771bb1c +size 3133364 diff --git a/eval-results/gsm8k/5/ckpt_261/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_261/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83985f8bbf58b9d707a1d54c815131c99c1632b1 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_261/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dade8ebfe33ee5e9ea0b774f378450b54f7cac95ebbe06fc1396c8d8a134494e +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_264/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_264/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..090164d8256a4d4fa893df3ba3ba025735db65f9 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_264/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9c407b491bdb87c84b9285a286abcf079d1d49107209d0a6f077a5cdf162510 +size 3139311 diff --git a/eval-results/gsm8k/5/ckpt_264/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_264/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50997d687c452c3ad55e1311efba08931bdb3de0 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_264/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2514fa193a52a08b202e220c84265d2e8bed1c028907a98edd94df345896fea6 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_267/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_267/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdff89a39c04e4c1b3742c113c031bba69620370 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_267/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d42d118f28c31eb54595fb4cd3682f0fe5bbf702ab858a88815ad99de717fd +size 3140949 diff --git a/eval-results/gsm8k/5/ckpt_267/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_267/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6be898e16e4b3f92682dc2372310639afc029308 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_267/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b97dbddb4c6b9dda00f442d324178da49547a2aaea7c010271cc1b147569ce +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_270/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_270/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4540f39379a4a6e1981e6de726e52714a7198849 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_270/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82b52e598d9841dfbc33d373f9c769ca6a53be0122e26cd5be8cc5ef7b70f558 +size 3141870 diff --git a/eval-results/gsm8k/5/ckpt_270/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_270/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a640cb459cb22abb98a709e6d2c01be2702e6024 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_270/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e07c13579edba442279e0005f7677ab44628dfd2f2ef7835d85ede7fca13fc +size 2923 diff --git a/eval-results/gsm8k/5/ckpt_273/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_273/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..848bdb89a14bc3c058251a864dbed988d21491b3 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_273/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eebfca986c4000c9360d02fae8bfdeb1a53a9da548a0d21874f5c295d13eacb6 +size 3145282 diff --git a/eval-results/gsm8k/5/ckpt_273/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_273/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa24c1b6f83e1b60e6f61d9f325961267d2e90ae --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_273/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faa95fe7e4ce6fc324e35c82083277d7ca15145648a38300ef5e77261e491925 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_276/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_276/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e875afca97c6e20a2964d7e8eca5c82c7697543d --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_276/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0389894bde99e2e46bedef74953633690744473e49321d6e4c35d87e871dc961 +size 3147789 diff --git a/eval-results/gsm8k/5/ckpt_276/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_276/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33ebc95804407b6ebfa7dba1a9cda13be9b31ab8 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_276/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6d2b492d8ef546bbaecb906e47a7b8215947325b3347b8d75da335e7188e6e +size 2892 diff --git a/eval-results/gsm8k/5/ckpt_279/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_279/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23f9f399b2560ffb87c1f35f501ffe49a0cb4fc4 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_279/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80051741642c692d4a4214475618676eb5466e34fb7f4c28ea3ff08899867dc3 +size 3139790 diff --git a/eval-results/gsm8k/5/ckpt_279/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_279/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5eb5917fde618110cf3c9ed200f67c27f0ad229b --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_279/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3c19c652994bb43aed007f877f163664857f66bc6c34fab5c955307422132f6 +size 2918 diff --git a/eval-results/gsm8k/5/ckpt_282/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_282/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..035fbe201332beac44063f2e70c32cef1b8c27cb --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_282/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82e1fe5a20d864e33e1f4fe4c5b67c603925b4192c183c88d090974994383ea +size 3146664 diff --git a/eval-results/gsm8k/5/ckpt_282/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_282/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..897fadcd009e30f96d1e6bb5d4aae04518803a3a --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_282/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51fa30bc1d6aa152042503f3b53aa3579a065c3b658db69915307c0474372e5 +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_285/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_285/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7b1bd5d27baf19aeb651ee64f1c60a09d670d1b --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_285/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aae3b449ad0eec70fac292c9a087d6465a365c157c70382b71e3e5695b5f44b +size 3140685 diff --git a/eval-results/gsm8k/5/ckpt_285/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_285/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bba026c0d118e5145c2bc0afb4e9fea5a1a5377 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_285/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe6bfce4337aa2c756210a1f590bf9060c44e846b20529ced0b99bf839089e8 +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_288/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_288/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c17d4f10cfae7dd757ee0f530a0922097a35f6d --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_288/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe503aaa2af2e45f1d55b442b24f1c463ad9e5974fd4dc81b9e9d8809f161c38 +size 3140992 diff --git a/eval-results/gsm8k/5/ckpt_288/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_288/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2be012e5458fe8eb211d4ee1ab5fa4eaca668727 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_288/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d876af98f9223df0771655665b78cf683ba9525717100c0fed89ddd4c664ddd +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_291/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_291/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4e5f6247cd52a64bfbbc5d1fd65c1f0a3915169 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_291/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7871bc5afb1598d23169109697105cc931f14f7bc96708448f6990cbf9b30b6b +size 3140642 diff --git a/eval-results/gsm8k/5/ckpt_291/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_291/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b18eaefaaebc8f6ba5df330ef6041ca1326708ae --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_291/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7a1099275075cf3c473b33176bf728ce41019dd03fd1d9014515c8bec7ce94 +size 2917 diff --git a/eval-results/gsm8k/5/ckpt_294/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_294/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a6d2d9c837e0f620cc8de801144cf31ddb355fc --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_294/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0afc767ad5fab113583faee174c80527f2feb795a0946184e25995b053499ad3 +size 3133378 diff --git a/eval-results/gsm8k/5/ckpt_294/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_294/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d9d738a4f531eff73542f57c6efc058e691e81d --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_294/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1469ffe4cc98ab1f6818bb9bb32184fa76b74930d5f92c1983c4446d296d4de +size 2917 diff --git a/eval-results/gsm8k/5/ckpt_297/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_297/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1b2edc7523e691a1f6972e8e1ef4c4428017742 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_297/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76a9aa91afafec4d9f9943a4efea3137f39a37d1202295b373e399f1dbb7166a +size 3139528 diff --git a/eval-results/gsm8k/5/ckpt_297/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_297/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a7174628f2899a3c4e5f588ae82d12451ad6570 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_297/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd14cb4e1bcc9d4ae3c290bbe6296e1b37ad1ae4c2befedb39e3a63711cbaf4a +size 2923 diff --git a/eval-results/gsm8k/5/ckpt_300/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_300/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c3d8d6b63c1cdce1267ddb619b6927a3f4e7db4 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_300/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53864619f9794d4e2b4f0b607387a18ddc0a75d8c564db0186188201cbebb42d +size 3143304 diff --git a/eval-results/gsm8k/5/ckpt_300/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_300/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbe0deee64b7a9f9cd595b08e9f435ad3c3bff53 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_300/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4644703b17f103eb96cf5d25d6c86f162e7ce9334b7a624207985ac149094e +size 2917 diff --git a/eval-results/gsm8k/5/ckpt_303/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_303/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3b1b6d8857063614bdd7dfa7764a24c52ee8d31 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_303/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83196e22235adf6301b0ef68bfe153437a47373a2f5b9b85fea116f23376059a +size 3137195 diff --git a/eval-results/gsm8k/5/ckpt_303/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_303/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d2a2f28c38e56e1be0479177c577ee5b7ca7b8f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_303/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f6bdb07b78206f52dd8d68e13038c87f01234e2b234e993f0776bb1ec050381 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_306/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_306/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..442576ffc5e9ea31dfbaaac75c102ab69a28018a --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_306/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57493be6ea4d5b176cbb5d154c008aaca21e094b384b07081eaa1cd01f9782df +size 3136497 diff --git a/eval-results/gsm8k/5/ckpt_306/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_306/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d20279b7e6615c6d6f102e7f0fb5d7bbd964e801 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_306/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6d1c261c1d43424eff4749cef4623beef9f73ebf64cf9300624a9e1c46aa969 +size 2919 diff --git a/eval-results/gsm8k/5/ckpt_309/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_309/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe7d2a691577d2edd37e36df303d93a87519c308 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_309/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70a0345fae9a080f340cd976679a0164a22ac73a377adb05ea32cf516b769a31 +size 3137406 diff --git a/eval-results/gsm8k/5/ckpt_309/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_309/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25e07f45da64087cdcdcfe4befd169716655bbf0 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_309/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b68786088795a129f0ec98608dc4fa4f000a57a16b6681f204fb258aba4a72b +size 2915 diff --git a/eval-results/gsm8k/5/ckpt_312/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_312/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..daf8dab658ee36bd9f7ce246c561f68138ec822e --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_312/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d2b7c59e3512acd9c45b2d311fbef0221467200a6f131673bb1f966cfce3c45 +size 3138466 diff --git a/eval-results/gsm8k/5/ckpt_312/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_312/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89e16a9c46b1b141fde9cbe113bf1e908abb4f86 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_312/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b1b2e5dc97d628233ac5714552d29b29894d8cc5e7343a007e32d2c9cfce762 +size 2918 diff --git a/eval-results/gsm8k/5/ckpt_315/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_315/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df752e2f66e28c804f002c83ea7ccfe61907cc80 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_315/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0be52000e36a7599ea598efc3dca25de3b105d99621169ca8069f5ffa2f06ed +size 3138982 diff --git a/eval-results/gsm8k/5/ckpt_315/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_315/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62d42e53ae0af28bcdb07b75d7be0cd000fa66d6 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_315/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbf933e36701dcd97bee789f3cc9d5da6ac20cf6d00c96934b34aad2bcd0331e +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_318/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_318/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..407995992c5eb1860a8dc728d1a376c0b6787a91 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_318/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0761b0f9de88371d409be750e3ebf98d6344034c0eb28e58ba0c84d2f7aafd28 +size 3145625 diff --git a/eval-results/gsm8k/5/ckpt_318/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_318/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29a2a4427a88eb604b33b29958ddef4c2d335b14 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_318/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca63309e8275d0ae2be12e72cc57777e623294d36f0429b17f67790781ff0a38 +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_321/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_321/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e38a7448b1307d11eddf50dc790bb21d99380bc --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_321/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ab53a14e1951ae8f7a3ae5df2eff5644ac9b62ddc718be0b08ef6e89fd2e6b +size 3146624 diff --git a/eval-results/gsm8k/5/ckpt_321/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_321/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4a41ffa4d3d0dfbc3cbef9f79189ce90ca6f5e6 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_321/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88460221f09cc2061043b98c1384568f05dbe03e7550627a4c0d6adaa38fd06f +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_324/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_324/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..658d1cd91c8c2a81a51c9767bfbc6b4d3ade87fb --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_324/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:577438be37fa3acec3d627088c31594c302ee5737ade48123ea50e16e92d5293 +size 3139245 diff --git a/eval-results/gsm8k/5/ckpt_324/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_324/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4f231829dbbd0c93e07e437d8ffd0a27f925c9f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_324/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c70444583c6d5ee6fa667cd88e059c9bf037c32c40605fc858d2e32ba9333c0 +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_327/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_327/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef5fb3e7c8e6a2158c7627f39c837474601f2cb2 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_327/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9444782db680c19a071e987ba67bf1b5bd22bead2a816b7e1e352637ba9dc2bb +size 3144411 diff --git a/eval-results/gsm8k/5/ckpt_327/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_327/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3de6057e4d2d2eacbd8c7343e2e5f31b9c8277bd --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_327/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5dc8a867072db729421aa413c9921270d725141837d52750ba8b14e2637a704 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_330/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_330/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff4aacc680524cb30b05908570b5f6c28bf9a4d6 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_330/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc5a24ab8698f148d3730f3f3cd0af408da5611dae8d159ea390e6ad033cf7a +size 3143046 diff --git a/eval-results/gsm8k/5/ckpt_330/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_330/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e881fdef015fd457a1b9563add4120ca08af9c63 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_330/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c55b231fd4898f1f50d80a7e37332c86b40e3af8827b987dd14baae41718ad +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_333/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_333/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78ea635e060c00591a27b3606022862b6db0dba5 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_333/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e4e5dfb12968b36ff9c81e03f8ab65d1860925f08dcc7441413d7c9b76fcae +size 3134912 diff --git a/eval-results/gsm8k/5/ckpt_333/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_333/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3359f263ae72da389e72794bd1a8267153190852 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_333/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b70c6918e067c2e13c23d33a98713c54dc09da02db3ffaf0e531dc98ca288c +size 2893 diff --git a/eval-results/gsm8k/5/ckpt_336/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_336/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eeefebed4b064ac61bc09698a82d16e631f3e99f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_336/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7052f8f70b3fba0941b42337baa09da735cecc9c98559231930c36c37a73fa9 +size 3140489 diff --git a/eval-results/gsm8k/5/ckpt_336/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_336/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76694c1fe18e860ef97b0ac6690a542852f35226 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_336/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2cccc71c329a511f565a73d256ce7ddb623fac4d928e93de0d16d094fd0430c +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_339/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_339/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a5ec6a4b3d4c15b155684e393e2bda4f9d86e0f --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_339/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15c8ae41c7d56eec80868597c0777cdb711f5b9d0d7dbc0911bd1df87bb7085 +size 3136274 diff --git a/eval-results/gsm8k/5/ckpt_339/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_339/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..090c99cc2107fc56c2a63a9984f22fdd409e5752 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_339/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3788828ba9cbe55b09cd5817a7e4aa77ceaaa441fe12cd6f79898e94431023b7 +size 2917 diff --git a/eval-results/gsm8k/5/ckpt_342/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_342/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd65eb84c6eddbcc7f7c92e56e82aa4a01a94232 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_342/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4faf403334fa188bf2ad175dfd0b42a2a303643df2ff711d02298d2bd9f92d0b +size 3143581 diff --git a/eval-results/gsm8k/5/ckpt_342/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_342/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ca130f465d0112dafbda5761557ed8f2f2707df --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_342/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3805deba3736e94a7180d5adcdff0d76f488361dd14fa38ea98c6ef61ba74323 +size 2922 diff --git a/eval-results/gsm8k/5/ckpt_345/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_345/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da1fddb287e2ce1ace683d798179e6e30a42aa26 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_345/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d720d22b68eefad87be1cdccf12290fa1e568d59d83d1fe4d72edc7db0dfbb +size 3142386 diff --git a/eval-results/gsm8k/5/ckpt_345/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_345/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b26d93a90d3cd7ca70b1e0eb9196c94935ce7826 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_345/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b178a5fb6de2b0f5945a0e1d06d23d4c888f5e082e63df9ca3dce546fdda3a7 +size 2924 diff --git a/eval-results/gsm8k/5/ckpt_348/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_348/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cffbb99b89f147ff8550f70f46ac332d44f946d0 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_348/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d015ab0763d387cf390118fdcfe51cb39975624e30354ca3ce6480422de211d +size 3143815 diff --git a/eval-results/gsm8k/5/ckpt_348/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_348/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0db26c5df6b3fbb0bce705b067d00f9be1a7e24 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_348/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:883b17e09a6b80ae86350986688afda59a3ea0fae6db72896fe11b1be3d0e15c +size 2923 diff --git a/eval-results/gsm8k/5/ckpt_351/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_351/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89c502dded5919e74a755c835bfe1c2508d237c8 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_351/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0c7609e5adae7f2c6a56d09fdaf0c79b5ef19733f5131a3b563278546ee8e5 +size 3137878 diff --git a/eval-results/gsm8k/5/ckpt_351/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_351/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..322e3d38d88ec4d840dbd89e62ec21ec339cce7e --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_351/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45b4f9802566c26e254541eb05c347cce62a935e99f3550327d9b3c0329b43b9 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_354/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_354/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aad24dc03e65927b44fbbd019510a4978efce1ea --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_354/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6bc873afd91107b11d1a1254f5398bc87d7b8efe861cbea47e837f0510e9f79 +size 3141600 diff --git a/eval-results/gsm8k/5/ckpt_354/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_354/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c50f454059f6b9dabf0fda8b7604bccf48b0196b --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_354/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1658c43c21f7525be23e61bec6c7ba0723d2bf24ec1be8d3914842534959f6 +size 2921 diff --git a/eval-results/gsm8k/5/ckpt_357/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_357/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..371641dc88f56c1e6946dc5fa36ea4da6cf8e150 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_357/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72730f79d07b13728ffa24caa584d29cc2bbde565d3b67f7eb7f537dc72e92a5 +size 3140113 diff --git a/eval-results/gsm8k/5/ckpt_357/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_357/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13d75919ad6d9d87ec77b406756c80728634c465 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_357/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d332d40adbee31f11f14b55fbad39256e839c03a68b300434407ef51674c02cf +size 2920 diff --git a/eval-results/gsm8k/5/ckpt_360/gsm8k.jsonl.tar.gz b/eval-results/gsm8k/5/ckpt_360/gsm8k.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..deb327ab65d938ee9bed1e9b812b9b3c9d769fe3 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_360/gsm8k.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcdaadbae72c453a70ddf9545511a428b302f79378687f077442ce8bce81b002 +size 3139761 diff --git a/eval-results/gsm8k/5/ckpt_360/results.json.tar.gz b/eval-results/gsm8k/5/ckpt_360/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65272dfffd0b68c1556003e5388849345b711269 --- /dev/null +++ b/eval-results/gsm8k/5/ckpt_360/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eb71a19cb17d729e4b7d6a0efe2bc08981eeec5e8de5b59bc4c3f21980d8b16 +size 2916 diff --git a/eval-results/hellaswag/10/ckpt_003/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_003/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..148fb87d68cb9f744d2d2a719eb7a2d14d89d09a --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_003/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5b6794bb526f91d623f2fe2f3b14b363fc4477e405067718ffe330ba8f1c5ef +size 21182402 diff --git a/eval-results/hellaswag/10/ckpt_003/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_003/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..321d1b35433c7904d606525ac57681c860a77088 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_003/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f821faeaafd547a874127a4e3695335cef48f9024306ed2f1da21e79c8f98cef +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_006/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_006/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc113c6b703fe3d0f5a0d69b7eb25bd0be0387b9 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_006/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c4a6294b2d99f3a5265bdb19ab40907e86956ee109dc483518901e6f58788e +size 21186270 diff --git a/eval-results/hellaswag/10/ckpt_006/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_006/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06e85df1fdc8f3a029af8ddd64ffb334b33ff750 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_006/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9d8c3b9d51468f8bb7653492fb1248704f925c703d884b04e3ce94d117c14a0 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_009/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_009/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c25fab5f3567cc9de43a25f52a66bee811548ce5 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_009/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aaf181e688eb301cbaaadf5c453cf114188df468d11b62d3ccdfd99542972cd +size 21186437 diff --git a/eval-results/hellaswag/10/ckpt_009/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_009/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7624aa8dde54a294a27ddb6d79b3e6b4a184d53 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_009/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc061e983dc9fc7e82b49f0e7dc1122beedf3d5cd096f3ecd60b2536e553736 +size 2837 diff --git a/eval-results/hellaswag/10/ckpt_012/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_012/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffc93b61e7dede9a0ead1e41b062e792fa1390e0 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_012/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ea1586a3c8042456e887423c47dd1c8baec8e7651abc2000fcaba68ed0a6267 +size 21186061 diff --git a/eval-results/hellaswag/10/ckpt_012/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_012/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90c48cf72c5d4ed7810d35faeb51db45ff3e3ce2 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_012/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb14d3fba41a3a880efd719ac99dc00b6f4df790a2a441d1ddbad67ab5b406a +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_015/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_015/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3006aa32a96abb1cb3c0bb2ce6d01ca80b3baabe --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_015/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:947822e695229b57d0f4750a4b5183b9e463cf0b8f2264355e6befb87852d370 +size 21185871 diff --git a/eval-results/hellaswag/10/ckpt_015/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_015/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37c52f9f7dff2e7cae24085de22780a4d44f0f6c --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_015/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ecdc2b199df15ec606f9299d0a13fdfdca98ef977cd937feafdf1a8bcd6611a +size 2869 diff --git a/eval-results/hellaswag/10/ckpt_018/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_018/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad2537542271cd31484d48ca93ba7d7d58df3042 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_018/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42b64c9cc7b76a30ff476f981333051544a264ee592cc7ee88a80865e0c54b08 +size 21185257 diff --git a/eval-results/hellaswag/10/ckpt_018/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_018/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f0a19f0c1adae79aea8ea03c23627bf92083ca9 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_018/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5caf387644c71a0c09c805fb1722f4dce57bc5718b23900f22f632addb81b5ff +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_021/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_021/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cf2fec7c3c4d9e3b2d7a8ba4652379149045418 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_021/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a97f99300277fb9f9d6df89f5c9a01b37aa0b17fbc21259be3b10e808f27518f +size 21184675 diff --git a/eval-results/hellaswag/10/ckpt_021/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_021/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ecdfe1ee3f34cd2e1c027d9f8d15a183e359d2d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_021/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4445fc83847bd61732d22679642e6b52d012fc81e8434d958bf222ab45acc835 +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_024/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_024/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b6bcf7992727bf4e59ffe490b4a97e6ccad7bf2 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_024/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b063e4f102a0ea68ef8110fc870e37c379bd9f2f2438b5714c6d4b54804ed900 +size 21184437 diff --git a/eval-results/hellaswag/10/ckpt_024/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_024/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2908192bc2a87889498b1e2d3eaccc92b712ec73 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_024/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0b81bf7dc783593f1ff91b77d7664160a49a72b7b188d9d3280d662c0713273 +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_027/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_027/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f1c9d976532f68fb86476c8f67e9bad0ff50039 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_027/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a155246229c1621c060f3e84ec835fdc55a103afd5e16668f7d2acc43d3855 +size 21183879 diff --git a/eval-results/hellaswag/10/ckpt_027/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_027/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cff384e437c50085d1147ae6609a98ee8b8d6732 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_027/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65df678fea6145abb7fa8b6b11586875785f208c6fa13a85f124480b445b1cc7 +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_030/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_030/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b825405780f418f0eb46e3ec248af68df7857d11 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_030/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aee2227e7e06b7a595ddb1e2f49720da580899e2667bea08dc2eadeb6848ecd8 +size 21183678 diff --git a/eval-results/hellaswag/10/ckpt_030/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_030/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..141f3f45727bf44f6fa0c2d0a4756490b1caede0 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_030/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8df4de8c32f5c7044f6ce699c59a2d1342d098c7c9e6fb0f273598f09f7feed +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_033/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_033/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f9df707ba961ccc417cea73c6907373ab0a1eca --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_033/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18bcaef14418f58414575c75dc1ffbd2f9d37c3d729cd9df7f33ee047378f259 +size 21182966 diff --git a/eval-results/hellaswag/10/ckpt_033/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_033/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0315aaadc14ac3f5f92dc608ceb3f63a66d032bf --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_033/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e106a019a04f6d8240114fd2acb8b26a10069350636bdff01160fb9cf86009 +size 2836 diff --git a/eval-results/hellaswag/10/ckpt_036/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_036/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c5ca86b1a627f8c453f0653662b89f568f3eb4b --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_036/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e87e8c06941f80bdcd60ebc3aab639dc8a4311604df873e3ef868ee2b43784c +size 21182822 diff --git a/eval-results/hellaswag/10/ckpt_036/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_036/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..848d9f39e6ac97e86a4c2a920815f5dbe4316441 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_036/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dd2fcf7806571ee5ead707c1436e49352e4c2f1c3893460411f1e5a2d59ab4d +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_039/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_039/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..957ce05fc4a149510f8e9c6875b2fd40b99ce109 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_039/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e20ad46762b5351db3c5608527cb45ed5aa218e39f39c15097db587da2fae27 +size 21183077 diff --git a/eval-results/hellaswag/10/ckpt_039/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_039/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58085df33c50af18cb2f4f68106fd53982f09127 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_039/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c79e72ffc60d62c3aa447e47fb0c5648723b45879693f29d9337b8350b0aec53 +size 2838 diff --git a/eval-results/hellaswag/10/ckpt_042/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_042/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d4851021aff25bbf920382ec12e978493888753 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_042/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7145962437adcb7d544ac59f82f53acc5a261396dd34f4acc54f59704ad355ff +size 21182540 diff --git a/eval-results/hellaswag/10/ckpt_042/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_042/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..117c24628f8ac45c02f81ea223f864540859ddce --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_042/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96f82fa4493508609f663bcaf92af9dd2a5b04869daa1c510319d65fd7349c1d +size 2838 diff --git a/eval-results/hellaswag/10/ckpt_045/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_045/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db5e6684526d4574f24d05e92d9dddb853ee8993 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_045/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fddab4a824b1b042cc95e5db53ed123a82e1e5649a2126d84d1c8e6d103ee0b +size 21182609 diff --git a/eval-results/hellaswag/10/ckpt_045/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_045/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d205fe5d96eaec4e45871739ba8e71453e32a201 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_045/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11663675f3c27c7613d7f5b0423d8b23d3e9aaa4a951a9e8d1149c8165662dcf +size 2838 diff --git a/eval-results/hellaswag/10/ckpt_048/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_048/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68306703cb7362cc02a1584e16db5c3dbc213edf --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_048/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:963a76d967b47bde1a7f1a3d6d51ea5859334b8e7ebf723515828dbe9f5de80e +size 21182235 diff --git a/eval-results/hellaswag/10/ckpt_048/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_048/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a525aa360a6f49021447e3f8e3aff7bee18874f1 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_048/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e8eca650744f2760da433d43014493b352bb850d714d94b4c6a84017de9aea8 +size 2868 diff --git a/eval-results/hellaswag/10/ckpt_051/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_051/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bffdc31a804bbf0d84dcf620886a6b0dd1c9cd79 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_051/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d76100376bf9cb2e2c26a36eb40fb722814049460072c94c0e33b0563619d71d +size 21182078 diff --git a/eval-results/hellaswag/10/ckpt_051/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_051/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..726751fbd55130ff83c291b6faf49ad6e2a5d849 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_051/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c3988e510964557b3148aa5915b218d0725a6500f8e326ca27e26b0cd1c24a9 +size 2837 diff --git a/eval-results/hellaswag/10/ckpt_054/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_054/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ca9e22700e55c74498c46d776439b7395b63a4d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_054/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be50c5f04d01d72f8ee102797c644b6803b46f131cdca11d7595ae80d01950f +size 21181953 diff --git a/eval-results/hellaswag/10/ckpt_054/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_054/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..962f206edaa8ce8c17796ccdc039c621aa2461da --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_054/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec97665e31f1178e8c7a5f2b249a41b05ac566c495b8f13357ca6b14359f7105 +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_057/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_057/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d09930ff5e1b207005537c8bd670c39ee6fe318e --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_057/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb7a102b7ce9245534d6810be0a93fe7c9e2035632623d3712a2726414fa4fc +size 21181513 diff --git a/eval-results/hellaswag/10/ckpt_057/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_057/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3a559551f0ccd72c514c516e9a06c133a754cae --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_057/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd4e9e79e1bcc1b46f760a387e6f7b401f7ea7f756afc9abecf82d9c6a845e3 +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_060/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_060/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7f24022f43510c22ceeee3298abda8f27ead8f9 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_060/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791a599a1ce5d942732383c6e5dc0e378d1c8157b8f919c2187bacf95fd59948 +size 21181493 diff --git a/eval-results/hellaswag/10/ckpt_060/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_060/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd3ccff9d06829e0ddcab51e89ef3721bc1de492 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_060/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8cd8a7c96748d63ca8c000f95294645ae117d1af8a94c44af2eb5bd2953f898 +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_063/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_063/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aad0437d548f987ad14a418f9cb1659f756dc94c --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_063/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:013f2734dfc2a8d0b9f96c745a6628d52aecb1e7908031e4ef0f5fa1095366c5 +size 21181534 diff --git a/eval-results/hellaswag/10/ckpt_063/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_063/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae0c32a715d82b21c58795291f00960aaa63282c --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_063/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76629a88d3eda09530774179707bc5aeb2d4fbf2253b3c1bd88b659114b44f4b +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_066/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_066/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d381b15f6051950908f99cb9d1ccdeb4db38d325 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_066/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43384a8cae56a50c4a497299b663ebce6cd73edcf4d69d85327b1410566cf4cc +size 21180833 diff --git a/eval-results/hellaswag/10/ckpt_066/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_066/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90a30b6c5d7d4c632c636e4f408d8abba4c61113 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_066/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a53c2ced19647dddbc08714f3e1d08484f351520ca1cb2f20d178a4bdb665eff +size 2837 diff --git a/eval-results/hellaswag/10/ckpt_069/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_069/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..884e1cfba78b75f2aef9b80b6a35a9fc721a8fe2 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_069/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1754116409c7f53245457b23cef373b6577591b76f767a14edbb797f9a4c076 +size 21181312 diff --git a/eval-results/hellaswag/10/ckpt_069/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_069/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1dcc00cb14274c4c295f656e1be421324187ea75 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_069/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d779c2094f60c4ecb14315d1e21bfe90c4c36a345c585c3329f5dca627d61e43 +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_072/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_072/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c584e668426ecc66fb18cb00abd57f0f8ba54401 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_072/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a20f21d95f3db7bb5ff31fb1338ec87434901d137e381dacaae68d732b37d95a +size 21181956 diff --git a/eval-results/hellaswag/10/ckpt_072/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_072/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1383e1b88b3b002aaef6a9c8800c43f71d59c572 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_072/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b880660738b243e52254234cd9d18628153b727d2fe0e88d0d73d4f052b550ad +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_075/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_075/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4da73bd4c74136d2cdf70d4d14e63183545e690 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_075/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a701ae9ed86494a635426c99f8a8583cf678f5fbfeb1275e681c8070962b1536 +size 21180777 diff --git a/eval-results/hellaswag/10/ckpt_075/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_075/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d982e454ce43ea33d6f43344c69cbf256d39e3b1 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_075/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:217d88f6e5fbe27a627fe28f77cda457b1c2cc49b395236a7a2201a96e8c6f7a +size 2838 diff --git a/eval-results/hellaswag/10/ckpt_078/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_078/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47ce033d022f3f23960c96bf5b4b54001d67966a --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_078/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16c1c15154330d09f27c1c8c1d0c5bb061ed78a2c42f73ebe771277f82e34852 +size 21180450 diff --git a/eval-results/hellaswag/10/ckpt_078/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_078/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccda4b5f6b21c6db63557865abfd42a251857660 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_078/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d25ff118b699efe0cc44a9893aa931b868ea1e8cc557bb64384e73110ea883c2 +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_081/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_081/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9fbcd91dc1c88aca27d423074de9484b3c6ede9 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_081/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f55b0153e46f31c0ca03a7f36ca8b0fde6948ae4719abcbf71720024db8e017 +size 21180674 diff --git a/eval-results/hellaswag/10/ckpt_081/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_081/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da5deeee6f37a65780425159fa7496f45ced7970 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_081/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a5decf1e34a4530107aec00589725fdd256e9ff880cc96e2e7ca999efd4d125 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_084/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_084/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e025052bb8b83802de47a1921f76547b673cbfe --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_084/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896991143a4624da4cc78298bf8157efe54cbdb85f60f28f2bd26f1a0b5c4500 +size 21180828 diff --git a/eval-results/hellaswag/10/ckpt_084/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_084/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87e32f7d7d5968087cc0ebde0c05ac8d63125d1e --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_084/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e07b860ed92fa6caead8c5975a6543565e8b9852432ecc9bd1d26e17a7e10b +size 2869 diff --git a/eval-results/hellaswag/10/ckpt_087/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_087/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..256d586a8fcdca65664a3ade33fc05baa05ec4ac --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_087/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e9efc7de7d4b3e1553f8946f8fb46c39c37bf88518b66d5f9ed0bb873d45a9 +size 21181087 diff --git a/eval-results/hellaswag/10/ckpt_087/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_087/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2708edec6855bf5076c64faef86bcd90b35db597 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_087/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1764c6d504892b810c0902af8f9abcf53b27b97eb2ee0db780514e1ec9e5b62d +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_090/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_090/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec49ca050bc41e8ea6dd67da767f68beeadbfed7 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_090/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cba49b01c2198ac398573d5bda14b55a9f51fb52e6550eed9b32c0e41d786272 +size 21180678 diff --git a/eval-results/hellaswag/10/ckpt_090/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_090/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abb8f58a33ca622d9152367331ea2054cb341f30 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_090/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58bb71069e05a2950e76358ccfc86999b3946843342af5dc12ea10696127ff0a +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_093/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_093/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df6324fe01794df332afdb04afe1b346005d306d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_093/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d10172fb2000336d09c99f4df61970b066bb48d32c336dd9e3686778e66392 +size 21180253 diff --git a/eval-results/hellaswag/10/ckpt_093/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_093/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..181ef6f0f76720b7916315f2903589fd86437344 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_093/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae61952ecbcc7dd6d2586e63cd79a257ef9bd7657f91057c039ff36f824ce6b5 +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_096/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_096/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd06a11dc54f7ae6ed9e4a85abfa7f7189e6cc9a --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_096/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4f3b26229ede75842cfc95df9f5478f68d67155e97073bae877796dd8d30008 +size 21180078 diff --git a/eval-results/hellaswag/10/ckpt_096/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_096/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0305e979061fe945c41c055a88ed1d1890e98c43 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_096/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b04081fe1949a94b9056ce316bd168f096fec80382fffc2002d10e79187d4113 +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_099/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_099/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..918d32241762abe644e9e94eb85dcd32fcaec43a --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_099/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec1ffc04134fa72cbce4bc1f6d17df25a40d4a735e9f7232240c304ca3bb5f58 +size 21180313 diff --git a/eval-results/hellaswag/10/ckpt_099/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_099/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..282881db81b3d44d0c85c95f1f78f9575da3f937 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_099/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b49997850a00144e0bbe53d894e8973340a86b488e829321ee19fb3d19c7a06 +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_102/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_102/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e811a7a56026ff047f3429b67ca6ddf619fc773 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_102/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd715989815a44a747dfe71056ffc85d46a048096fb82cf03e0a34f00511858c +size 21179547 diff --git a/eval-results/hellaswag/10/ckpt_102/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_102/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04c1e4d0157846fcc1568c5d898e9664cb4b38ed --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_102/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a199d6535a2cc38d3704fb83df997299dd7ec18c3295dab2d25e5c0aa15cbae0 +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_105/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_105/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2eea55d46598df759eafa01b6466e6c3dee1c32 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_105/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24e754abc91eda39a8b9b3ee2cbf194338eae0feb15bdbb2e46cfca3b96a7656 +size 21180306 diff --git a/eval-results/hellaswag/10/ckpt_105/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_105/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6bcb1983b9c892b83779ba8e2a8509743ad01ff --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_105/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18325496d5aa7b5523c8cc7b7dc59632bd2cf277ea2b1c087c0f2dbb86426a1f +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_108/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_108/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4677e860a1693d45bad2d0b71870fa63e5306da2 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_108/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0517fe02004c37df7da15898385c6a6afdd9c006cb3dfd5492e0d02a90ba0826 +size 21179695 diff --git a/eval-results/hellaswag/10/ckpt_108/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_108/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f7bee71493a55cee5a06f9cde6dd49f018e39ac --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_108/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:497ce6379a3ce65e2f974b05ffba90531e9f8c4fd4faeef63f5f231a09ff3278 +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_111/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_111/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c539bce66b8b2cf2a2a4cdd1439ab43f7a0d3bc7 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_111/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94607ebd126e029cb66ae0d62a276e0cafd44e8a63716d6012c7451ad56c15f6 +size 21179578 diff --git a/eval-results/hellaswag/10/ckpt_111/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_111/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77e1027b968a6e67360108353ab3aa195a95ffb3 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_111/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63493b07006a2d79d0cc4f4a4a1fbfe6332b17b42c6a3df5e0e976729c28850b +size 2837 diff --git a/eval-results/hellaswag/10/ckpt_114/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_114/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d55fabd17a0f372152082f2bb32be00306b8930d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_114/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bb0d83c8728d189e6c73afe446afb705f071a132008fa4833770a6dc91bf8db +size 21179691 diff --git a/eval-results/hellaswag/10/ckpt_114/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_114/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c95a941004b9dc002b0a84f83166c22e5aaa03cc --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_114/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5c6ab9082088dfc73cd0878b3779c50689852c7464aa8a6faa9474650a4afbf +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_117/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_117/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a49fbfe4fa2a702127bc17022203ae38439b5564 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_117/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebe372b98aea26a986436d3901a73a021288bfbb0edc7c9f5e78d7c9a9486f53 +size 21179497 diff --git a/eval-results/hellaswag/10/ckpt_117/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_117/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b49f1c92be0d2ee89e4bfc815bd38f7af3ae12e --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_117/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cdea551a507a9d64f986fb8e6edd995dc256199fac87d375167982051bb320a +size 2845 diff --git a/eval-results/hellaswag/10/ckpt_120/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_120/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ed01bf04b3d6e48a582ccd7e51ecb593e4add11 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_120/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064dbcd0c879a0a90bd0770df43eead30fc8fc16ea7f899bde3c244d0e48a52b +size 21179728 diff --git a/eval-results/hellaswag/10/ckpt_120/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_120/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4979474df9fb9ad8c02b6a68b1ede8b63076ddc6 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_120/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba606760d39a99f99ba6661044bd0ed52e969f8c99e837105616fcefe084303 +size 2868 diff --git a/eval-results/hellaswag/10/ckpt_123/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_123/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1362d4ba64688fd9111c9412b4f1d2108f57c30 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_123/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26759c3e4db2338f72ac09bb732958c3f2568e2a7f923d67857a292e63e5b10a +size 21179250 diff --git a/eval-results/hellaswag/10/ckpt_123/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_123/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61f340aef6f967add557719ba0202cf96c057c40 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_123/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:784a6b5aef9b2a31ff7ee19078d3e874fa4fec642d45b8f26c973ad0b1e22953 +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_126/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_126/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..858f1209f65ca5e1647efa075e6541c2a273d272 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_126/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db6de0d925fc1d59154a1aeedfb281b30f210b75ced7db4b6be91586ea79988d +size 21180003 diff --git a/eval-results/hellaswag/10/ckpt_126/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_126/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dac5c5b19d37f8f3484b5430d23d0a75e3f6bd3b --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_126/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2656a201f218c014654ca1abd33887e33195c351584b273c2350ef128a0110c7 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_129/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_129/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d4a2b299319541e19ea33d90d59d11f0481414b --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_129/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:888b3d3d10aa04a837ebb2b837c7573df50e075864d7c60792efb11d5bb9b6a4 +size 21179668 diff --git a/eval-results/hellaswag/10/ckpt_129/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_129/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c1df7daf617a5dfaac0dfc1ff80293c930b6352 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_129/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9005588dba2fd1d102c026052c69f20ee508826a6de011f0525ae89db65d9ebb +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_132/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_132/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5277a22ba79fe952f89e950eb7f566ce5d0d62d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_132/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d416c78187e3685268905c03c95b5888a6f96e4a11ec40bae99bce31ee11aac4 +size 21179452 diff --git a/eval-results/hellaswag/10/ckpt_132/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_132/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c36f96e4293156cef885dce0c13c1187b759077 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_132/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b1fd471b5acd0150ca8ddac6bb485b9cb2d08c83be9f6f0e25df1759e37e6a +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_135/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_135/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eca1c84c821ac17c7c93960e4f7f73cbd8c9c867 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_135/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22b0db8624fa2adc97290cd1ee79492502461c26e5be8b9f4af98e92646c32e +size 21179369 diff --git a/eval-results/hellaswag/10/ckpt_135/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_135/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2dfbf7888d04034acfccb100af91f9b6e45ecbb6 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_135/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a29a215eef84a70ca554ff1920f59c54cf615e977453348de4bbff7d2988bc +size 2844 diff --git a/eval-results/hellaswag/10/ckpt_138/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_138/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0aa6695aa5798e1cdd14cf1764c02b0a54e84e3 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_138/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cc3d5639cdf52ba91746123e0b0c51ce36f050d91baffc5d438b338d5959845 +size 21179423 diff --git a/eval-results/hellaswag/10/ckpt_138/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_138/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb6a90f376f5a6f479203b44a008ad0f47c33d84 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_138/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4912c5e41e831d764ff5d049c4e2ce53bcaab1dba70839a19d912c2565d34e0f +size 2845 diff --git a/eval-results/hellaswag/10/ckpt_141/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_141/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e44d43866ef333b837be6ad3fac0bea1abc72ca3 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_141/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c8ae1fa6dd47d251382dd9550788abc21679a12c783d0c0831c7a9ce67d185 +size 21179540 diff --git a/eval-results/hellaswag/10/ckpt_141/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_141/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3894e64cf3b555ece367235d37d7f81b6d2c6c84 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_141/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d19470b0bcb98ec5aaecdd24a7dc70fcfa9f0f15fd1f612230e406730ca8af1 +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_144/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_144/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76aafedc25f865ee71583994a17a6854aa3f175a --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_144/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644ff919d625214bbed97cecafc2f988d1c1c902c4393967f6337ef8584bd065 +size 21179138 diff --git a/eval-results/hellaswag/10/ckpt_144/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_144/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aeba89b7307227119721eca878a27f5f889cfebb --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_144/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edfd00b949813cbbced7386515bb7d5b59d1ecb52e92a39150ff276fa2c58bf8 +size 2838 diff --git a/eval-results/hellaswag/10/ckpt_147/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_147/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ba1d8d4ead316b44f78fd8e0c3724325b3c7651 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_147/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea9b0b827a8752b97c0e4f5f49665497ead2cdcba75a13d7f6753d46326dbd7 +size 21179051 diff --git a/eval-results/hellaswag/10/ckpt_147/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_147/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8357be3921d7b63dd0654b7d34311edb8df124ef --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_147/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72218b4dc6cbe42dce375278618fa11bef7d16142e401879f965c5350306cc0a +size 2844 diff --git a/eval-results/hellaswag/10/ckpt_150/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_150/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2cb28c9b4164ec141c9495a57b70071a65108ca7 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_150/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d798efe3b5d262146dd76f4a67e033da74c0f93d8d755d793f8acb60d54b4e62 +size 21179519 diff --git a/eval-results/hellaswag/10/ckpt_150/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_150/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0aebe7433e19e3c3356fd9b544a35fb362b2f597 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_150/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ed5ec427fdb4f6ccd837a77291b8853faad47107c71a2a8a2bcb9012f2afa42 +size 2844 diff --git a/eval-results/hellaswag/10/ckpt_153/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_153/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d37cacdc360b4a6c7bbcfda9be9544ec7e48152d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_153/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e05bb11a65ff30609e394ced748b3928fb6ba861aaaf0c8512a0a290f79fc1b +size 21179035 diff --git a/eval-results/hellaswag/10/ckpt_153/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_153/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fca2a3a0235fa99794689d7fb9d8917bf7585c01 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_153/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e63981aafe4e39f9e7055324944cb9e2ab30036944d549193bbc401f44ad91 +size 2874 diff --git a/eval-results/hellaswag/10/ckpt_156/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_156/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87dcd9d9ce81d8c95a0a3c18d59c24c69f798cac --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_156/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f4d08d572dcea0cda4cf4e3836a4aa96ba00504d4f32c7c36bdd55632cda43 +size 21179493 diff --git a/eval-results/hellaswag/10/ckpt_156/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_156/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54efc7f739c3c0cf78be286b9dd07950710ca5d4 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_156/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab02329c1abd0f44470f3d029c0e76dc4ba0b0d21afe9bfa575cc816c12c868 +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_159/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_159/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f00082c586d717f205eff477c996f50a57ed72d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_159/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e359a5b2a327434a1cf32807b88d37d3d389a62443a80eee8039c1efa5ca071 +size 21178886 diff --git a/eval-results/hellaswag/10/ckpt_159/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_159/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdb3100cb7b0c0e35ceed6b4d875f69f1f2c7fb6 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_159/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75cd62a2b6b1ba04242c7625a4339857a0a6d1226faa0253e49a20933d839c90 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_162/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_162/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a70ed19badc63d9b359c531eec17e8a2cc69ef8 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_162/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc56744fc0f109b933370bc7e195a6161f3ab6706d041000bd3d54a4a04016e3 +size 21178771 diff --git a/eval-results/hellaswag/10/ckpt_162/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_162/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f92d5d490da278abe4846d084cae24c4e88c99e7 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_162/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c0133f5ffb152951b9ef01ec29329dc10afa732c2675a1faeeb79e8802639f5 +size 2837 diff --git a/eval-results/hellaswag/10/ckpt_165/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_165/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef49bc7467bcf9b672af1927d99c143017d3c26d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_165/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae39866a69a6fa2db5c307be55673e4a3a4f1f9b6379043939bc897d8714a3f5 +size 21178846 diff --git a/eval-results/hellaswag/10/ckpt_165/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_165/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..298b18cc23ca7c699483a1d6365304ae356149d4 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_165/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00dc3968edde9f6faa9749f31bcaa26e8522e24eb9dee8e8b8a4059570b10fef +size 2872 diff --git a/eval-results/hellaswag/10/ckpt_168/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_168/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8830854fb85fb9a0e108c09432b84d1791c96c4f --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_168/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a591fd3ddee807cad1da241deb1e72aef318241a26bfa9c71167e1cd310c557 +size 21178617 diff --git a/eval-results/hellaswag/10/ckpt_168/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_168/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..625375e9ccc9b8426677f64c5a9f92f5d92f0f30 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_168/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da6c204e171fa6521ee90b6c17958d2decbc96dd59e72942c1b21b24980c130d +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_171/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_171/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbee3db1d5590b77fcdda9deff0fcac3b44d72fe --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_171/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b88477a2b6a5a2203fd3463928701d3c85ecd739413f20b23bb5ba8bcfda068b +size 21178995 diff --git a/eval-results/hellaswag/10/ckpt_171/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_171/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a03a24d10fae98f19fda67cf46c171130c6265d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_171/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c381d3f89fd2b46a425c903c6d2148dcc795a9251624bb98fd87ac9aa92fa91d +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_174/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_174/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..584fc066b0f32308b405036e8006a55fed199040 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_174/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e872f1bdafb4cbbbc7bb017347199484de2596bb03c9721dd4a3e1f5ee47638 +size 21178783 diff --git a/eval-results/hellaswag/10/ckpt_174/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_174/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96cfdcaf849c0ed978f5a4b840c777ef782589e7 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_174/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e315a789c3576af2c325224710e2cfd7dc765c05655e6a6073fc30db6d2e174b +size 2844 diff --git a/eval-results/hellaswag/10/ckpt_177/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_177/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ade268e56d2d47f953f7f0e0318457e610ff6352 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_177/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f28a04831fbb3a22d6d4dcaaccdc60d35e662dfa35f6db8ca11a5dccba259ca +size 21178308 diff --git a/eval-results/hellaswag/10/ckpt_177/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_177/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74c6314e943bba3cbb20710e3a9b981ad49e6ee8 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_177/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f04cb485d7d6598f60cda61def1fc3805273c01dd624861633c406a0a4a090 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_180/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_180/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..884b00ad44e1351d68f84900464d930ae5e6ec7a --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_180/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cd3f0cf2aa057a84175eb7c9dc85df883684f9d4f27ab319dd7a36f01e621c7 +size 21178395 diff --git a/eval-results/hellaswag/10/ckpt_180/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_180/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6dd69d020ad4e9780c750e331232c28cda5a903 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_180/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a857cb772286fc9757030cf1bb19ef7675c258e76fa003121517956385a7ee09 +size 2838 diff --git a/eval-results/hellaswag/10/ckpt_183/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_183/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a70f8d7c19877685e04cc74404821123f049876 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_183/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5dafe2a040b58bff4676cd5b3081658c8e7483935ba3cb6103768eb541e8219 +size 21178037 diff --git a/eval-results/hellaswag/10/ckpt_183/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_183/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49baa9d6b7596e4683db1f011a8d7ef7f03b294e --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_183/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:970d6453ed3c0b61747455bde539354e03e0f11350485d18b9372148bc00f397 +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_186/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_186/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ec5ed215bbfd9a99b34166c725fbf9999e81a2c --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_186/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e1c2efedd85a3433b165896b3895693609ab553ab347ad9604acd00a353043f +size 21178410 diff --git a/eval-results/hellaswag/10/ckpt_186/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_186/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a17b0676aea57809e6d020c681aa51ec1ef2ef81 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_186/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2b3759c4442d749656431752e01c8f6fbed63867bc0ec9531307a3c8965523c +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_189/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_189/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cfe16e595ed8d5de14732b53ac84a8c65a30456 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_189/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4fd13ba698d556013bab5588e899163335c1c491e26e6f7256271ae62ea0ecf +size 21178171 diff --git a/eval-results/hellaswag/10/ckpt_189/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_189/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d3ee4f1afe3ff51c0a3326213baacf516c62c7f --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_189/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4951c4f7a7715a7a656bb3d12ee1f7e7db45dc758c8e2241f23e0a805418f5a +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_192/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_192/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0dba696176d9eee63e2ceb19c94afcac356979cb --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_192/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c9d85c67683198d1ff1496913d2207fe7b23f703c4b4014feadd8fd448498a9 +size 21177935 diff --git a/eval-results/hellaswag/10/ckpt_192/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_192/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4018e0d7dc902a998482dece7e1d79bf4634cc3b --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_192/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5119f6ae88a6b8737a023e90d3bdca868f6286a7ae761d3d6bc129e0d71ac192 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_195/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_195/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b228a708b1f6d21d60f177bb604ac42bf3062e68 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_195/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a1259195f132e00cd9ee9258fc37ae4c9e17fcc1d34ed93bfa57c646d3fbc80 +size 21178490 diff --git a/eval-results/hellaswag/10/ckpt_195/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_195/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7940a7c57b8406e26857e8f791b066c1e9560af5 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_195/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:671d452fd5188b5b473a1937dd5d1d4d8c0e28eb6d7734a4ba5e770093029036 +size 2869 diff --git a/eval-results/hellaswag/10/ckpt_198/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_198/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..619337c843bfa1068d1eefc04db9ef638e8b8e48 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_198/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60f6b6f469d9e2ef006c66889b1358629f19cc48f977e3cef4acb775b65340a6 +size 21178137 diff --git a/eval-results/hellaswag/10/ckpt_198/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_198/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a6963056a0fb352eeafa165db6fdd097daa059e --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_198/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb82bececdea4c7810f53974e71293c95726a66e5a2709c3a020bead2cf4f62c +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_201/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_201/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..baf506d977a24d44a641685cd1f092ec0df873c1 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_201/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03a2ec069b5ea44eed9dcb1cb75a8f1ee32abd8b67ae22a796207fe3c076c2f0 +size 21177896 diff --git a/eval-results/hellaswag/10/ckpt_201/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_201/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3eb90cd3ef1f2e88bf3076dbafd25b2321b40297 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_201/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c701ffd12accb8eee948db25d133e2b44640b0f95ce4ad6bbd22b279d38daacb +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_204/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_204/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a6749b23883e62ac2d777d29fea91e88045a332 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_204/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9d43ad16e78d8b1edc10613cc4dad0c5b8f94bd4820c512bcfd4955a548f9ef +size 21178360 diff --git a/eval-results/hellaswag/10/ckpt_204/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_204/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e65d1a4041406ca3a34ad85b32b3fb6917bdffb8 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_204/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d14c95a5c1bb599729aa1a1f7303792c0552e9f59922bf0ddf4425f8aadbfec1 +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_207/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_207/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4855a3375405706850bc515559eea60933d37ba --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_207/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5a9eb0ca64e323e152ecd8cfd4cc47aa96784e57b086512e637e84a4677ea4a +size 21177839 diff --git a/eval-results/hellaswag/10/ckpt_207/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_207/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3908c0351722362c8efb49d34e78b026d0cee585 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_207/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d7cf1a09a8b4419da38f52f6c8087e0a73f69a34b5173dface9c0d4d0e97bf +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_210/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_210/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c839e7333489053f1a241269d87e48ed0d593e10 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_210/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c97117ce4db1d7acdcf77d70ac1e4c7c6abbff166d0ef1f66cb4791df1b0daf +size 21177971 diff --git a/eval-results/hellaswag/10/ckpt_210/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_210/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94602d802d9e6a98b4baf7d6c89208338c04a01b --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_210/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c802e120e9c6fa162b152fbec87702ce996a77208265b0454f8a80c7c712dd0 +size 2871 diff --git a/eval-results/hellaswag/10/ckpt_213/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_213/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3249d2755a81aa1a8161b86e1c699f3914f2bbec --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_213/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd885cc563661baf04c1127993317536f75f0d0c40a47c52d2ff5de74d36741e +size 21177604 diff --git a/eval-results/hellaswag/10/ckpt_213/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_213/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..628c9fca16fafbae68fff403e04f9b93f8e4f9c7 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_213/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:732dd0188dc23645e771b36ee900772d8cc4d2e8d22ed14bc8a2388ad60c8f17 +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_216/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_216/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25c05460fd064109661ccdf702f9343f9ff1c799 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_216/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd427a60cd7b95be50319e85e8c65b9dbe38b9c2be631bbef9be9d78f7a8ff95 +size 21177676 diff --git a/eval-results/hellaswag/10/ckpt_216/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_216/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..efe66a7a63face584ad812ac5bbdc49a10c7a1a1 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_216/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc70d731bb9b3cdd1b48064e7eb5a41d6f468a2998506fa172275f00473e732e +size 2838 diff --git a/eval-results/hellaswag/10/ckpt_219/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_219/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..161468d9cee33e0c2cda65a8f473b23ea9e60926 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_219/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eaca518e05e7ab36ba5d2dcee823754c2a5292acdc9090913134aff0be185f8 +size 21177772 diff --git a/eval-results/hellaswag/10/ckpt_219/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_219/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2aee9921bd78cc25b24cfcc4e2f74e01a8f5dfc0 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_219/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e26a89e7cf57d87518c2454ef565fce132c71a655ab80d8157ee383e700a3a +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_222/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_222/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00a63696882061cb4dfb087ac39891a36bdd21de --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_222/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d9b7e073c3b60dadc02a5a53dae73ce4a084b838169dc54aafc81ce44929a9 +size 21178009 diff --git a/eval-results/hellaswag/10/ckpt_222/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_222/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd30fa30f8350137a098a2d417b1f4ffdcb64afb --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_222/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75c5b8c9ba1f92f17d3f868d6b01f2ac1dd79649dd028aa0a0d19ea9cb366252 +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_225/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_225/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3789257c1ace5705cfb3b6650b5d57e17016c1c0 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_225/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6bec9f4d447a89efe6985f4c6129501f7ffa0f8fdf76d51fc1da02bf6b6e79 +size 21178020 diff --git a/eval-results/hellaswag/10/ckpt_225/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_225/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3116031202d807e819ab35334a98a8715429bbc --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_225/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6638a241d9ce52374872ca572f6073cdceeafdeb537ab021a84029f1c253578b +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_228/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_228/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f9788767674d023ce009ad5f8ffbcbfe16e8f96 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_228/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99d21a42623b48e5fafd1c6c531d768fb49f2e26de456e38d9ca9a6baeaf84e9 +size 21176991 diff --git a/eval-results/hellaswag/10/ckpt_228/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_228/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ba88a007992ba7557b09acd8c71a90e917bff9d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_228/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b46365379f7829888cb7c2a9cf29a99624063b99728dcb1383db5f6051307c8 +size 2872 diff --git a/eval-results/hellaswag/10/ckpt_231/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_231/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..596107223dea545cec8296938a3673feb5b995b0 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_231/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd8f8e9764e80d7a900b3dbcc7d629186efd8614a7c9bf9f82df4289cbf7374 +size 21177652 diff --git a/eval-results/hellaswag/10/ckpt_231/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_231/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb5bb7a1e56f8167e5ccf538a04bfb1f2f6c0d6b --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_231/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db24742e1e1d43c442b54873f3786e8eec809727481332f12233432f05976ea6 +size 2838 diff --git a/eval-results/hellaswag/10/ckpt_234/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_234/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a6d32b461ed60b5948ec321d1c70da144e5f5ea --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_234/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69be46ec3c314fa5bad272de5bfdae41142c36a9ced4d40a45b19cb0ee5fadc5 +size 21177429 diff --git a/eval-results/hellaswag/10/ckpt_234/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_234/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..150f9968cc11b8fec6cb30412f543faa2696010f --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_234/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4376d9ce0957be253f3df70a55c80891f3feb1f2f47f823d65ab0d96777c76c +size 2838 diff --git a/eval-results/hellaswag/10/ckpt_237/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_237/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6dc11ff94d85b69e7df3e35a2b9238a3730d2fe --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_237/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed4170c9013e74c6595691f0f41c7ee18b798a78abf7ea3611023d3d3e2ad2a9 +size 21177739 diff --git a/eval-results/hellaswag/10/ckpt_237/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_237/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e87c9805f61f603bdeae1eb515cf7575eacc00e9 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_237/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb4c63f40b22e38e58c28632d4686d186702e098f4dc11c5821540066249021 +size 2871 diff --git a/eval-results/hellaswag/10/ckpt_240/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_240/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f907007f892f17042a2a066f91b837b12543a85d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_240/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71a0a78683cc008584629a8ea1b871ac14bc3bd5aa25fd85fef4fa6e3f0576af +size 21177335 diff --git a/eval-results/hellaswag/10/ckpt_240/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_240/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..720d2db7e071ba2ca946391f9f3432d35f7ecfc2 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_240/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5404979e3c1ce9f53c0ac8d20dd6919bdde6ef21b973281b18240d8afcb3cb7f +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_243/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_243/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..923c063cb9b2171f25ddfccff15097562b49a68c --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_243/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f33371210d105ff5792ee16e018ffcbb51f22a4c77998a7e0d66c348daefd60e +size 21177298 diff --git a/eval-results/hellaswag/10/ckpt_243/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_243/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e2d303fa3e93e9b5a755aa6ff9caebdcb8b360a --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_243/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afd32df4faf11154253119544fbd032a08afa155b7edb6216b03e596e4114e29 +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_246/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_246/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc1ea3867216dd0090a502002fbd9ad13c5f7f2b --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_246/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090d214bcc55c99ad38cba4cc6b16833b9ab3da37784f243978cb2b62885ae66 +size 21176957 diff --git a/eval-results/hellaswag/10/ckpt_246/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_246/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b80bd8caf3053343b22961998a5f743e32962b31 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_246/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84561d9e088e41cc498045513061528ad64177987874e9882f47ecd00dc61288 +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_249/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_249/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e26fec08d218ee51feba2ee39e46c3db2d69286 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_249/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af8dfc78e334ea37a6604e6dcdf4a93cbaae9d8953c136206bc96b2e4da14239 +size 21177585 diff --git a/eval-results/hellaswag/10/ckpt_249/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_249/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e65dd9e1e22ce4506bd4b9d9786d788a31e9db75 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_249/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26604129e921c46610dd4193ec3a53c317f871840ff1ea89d3c2128ae637276 +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_252/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_252/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80f683a81a87e36ede7f008cd2d412104a965e7a --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_252/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20aec2fcd7b4546f3cdce07b77ac77b812f5260381914e4afdee5cd82e366688 +size 21177082 diff --git a/eval-results/hellaswag/10/ckpt_252/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_252/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85d833cc01b10992b970372a2d1dd8bf8ab78457 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_252/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab517d19edc56f7247616e33a88e87f60e8367f32881406725cb178ecd999d86 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_255/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_255/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e8257b4c2fae3df9dbb5d9fcf680705d6a2c39a --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_255/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a1bb0dae685cdba1eabebc6061e964df6bb3c557ea5024e878510b45357bfe8 +size 21177163 diff --git a/eval-results/hellaswag/10/ckpt_255/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_255/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db8d595c4c1cc42b3a09bfa805408c098e636b23 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_255/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d37103ac54258b8e42df3a9d089b451ccb19034eac7598e1cd206a4fa45e13d +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_258/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_258/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8a707cc21181723a29a472ce491476985169a80 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_258/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cad7f7cbbd71704f8ecb794272a05f1d2188d6b76bcf82f7384d00e48093fbc +size 21177009 diff --git a/eval-results/hellaswag/10/ckpt_258/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_258/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8aceb05757a92c93a91242edf15714291788b895 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_258/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677baf1a811df10c9675f51ed5852e3564ea25355188d54940b04b431dd3b669 +size 2837 diff --git a/eval-results/hellaswag/10/ckpt_261/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_261/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4aad4e6072dbf6c585ee98dcae9207cd0a138ed3 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_261/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ffe2d5e74f0209ab49ed1f2f287e4b2357a601f80a473b97245cf5e7551c4dd +size 21177133 diff --git a/eval-results/hellaswag/10/ckpt_261/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_261/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b362860aed535a05206a76008850d0fe6f364dd --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_261/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4c6bfb96263c1e79ae5744028750084e73c27f3587b39f312451668a9f8ccc4 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_264/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_264/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a050fbf34e99c97b9d87ab617bdc8144df169d33 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_264/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d4b640bd1b1de3960c31420dfd2b08c45ba283e531139d45ba9c03cc26a10aa +size 21177005 diff --git a/eval-results/hellaswag/10/ckpt_264/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_264/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ece7003ac99305b632836ecb2f97fce8a0ea3e37 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_264/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7c527a6229ec8edfd3bf1b1708320aac5a771d6a037f48d60969259262931d +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_267/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_267/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8bee05f64cfb2b044940b70501d67e38816d668 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_267/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61305645fb87905306f66b35675ee7ea70dc1fef21fd415de70a684d7a69a43b +size 21176629 diff --git a/eval-results/hellaswag/10/ckpt_267/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_267/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a37ee92d42f9e3c00873c7ad0ae8b90db725f784 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_267/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be967ca05b129dfc872ebe254fc34381efafaf8948b254d4cc2207cf3de3937 +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_270/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_270/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58168fa51c85e653bcab5e300db122d24d1973d5 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_270/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a157f5cfc1d158c8bad6f0ba0796ad60fe18bdb57accd10f2e33e203fbbc7ad +size 21176523 diff --git a/eval-results/hellaswag/10/ckpt_270/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_270/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae7bfb6e2925be8994c0bf48f862a816b2711d96 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_270/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83763c314c9db991c8654342723cdf5f61d1912d8ba9cdfc45172e2336e945e6 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_273/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_273/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..608a495aadb0583e8c28c9ac0672ce5f0cbe0d02 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_273/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d69e2c252c07f5c3441002aaf0d5341fb2d96914299901859e99ed1d5dbe64e +size 21176204 diff --git a/eval-results/hellaswag/10/ckpt_273/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_273/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b22ec52f2e51faab3c0b8ddb795568278fcfbf1 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_273/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf283fb34c07185c29a0ce0aedf2b7cede712243e77407fb029145ab3a70bd5 +size 2844 diff --git a/eval-results/hellaswag/10/ckpt_276/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_276/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba907acea1caccaed634eb909234eb0fd89be946 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_276/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2a1e43237b3bb13c5769ef154b9fd0a3b1a21f35aaa785eabfb52f5cc5d112e +size 21176539 diff --git a/eval-results/hellaswag/10/ckpt_276/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_276/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02b49bfe29d1aa7932f55d77cc02288133c59854 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_276/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80fd0c5d1db3cb48afce9050c044f63d2e397d1ceaff9e251481d83081939c0 +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_279/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_279/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ea6ca292a0a8c344a6d384a487955aa297a0508 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_279/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e43ff8a015d5ceb1e6062132630432c1f683a43a65563e57e1eb7667035581 +size 21176886 diff --git a/eval-results/hellaswag/10/ckpt_279/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_279/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf5a900254412d9a0dee17fce4a24fdb681c8187 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_279/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2fa196eb590a365ea9c3579214c8727d37580144e69ee30c698529eb05ea18e +size 2835 diff --git a/eval-results/hellaswag/10/ckpt_282/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_282/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7b41aa3046d8440974bfdd9e1aa2006b35e1331 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_282/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdbd4a34b562f2054bb775f839e2033b33cb4b885e23e82744791eb05eddc2ca +size 21176833 diff --git a/eval-results/hellaswag/10/ckpt_282/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_282/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8a64e3b1409d2be7c88c43e40fadbe3c79ffa59 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_282/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8316488dcfdb4f3d44affed28719d8623d028630881b3bc438db0544b4b3eab7 +size 2844 diff --git a/eval-results/hellaswag/10/ckpt_285/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_285/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68556616519d1521a7255246754ebe91215a06df --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_285/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62d5fc7d35004046fd0896c6dea3f3e39467952c83843dced2c5bd08530db47 +size 21176815 diff --git a/eval-results/hellaswag/10/ckpt_285/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_285/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19bafb15ded61be0a7ab47123a42bb71c5d622ba --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_285/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2bfe49542d7cd2d08c79837817ff5280eb26533f79a351f191812bb491da9d9 +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_288/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_288/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e9316377abfefa5269ae2f98ac75c8a075d4749 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_288/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a4b0d6d6b47864b03dad893b81e236500f31bd61993e5cfbd1218de3f1dbb0b +size 21176321 diff --git a/eval-results/hellaswag/10/ckpt_288/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_288/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b2f1b31e4c352982b5a4450d10df15ac1ba5135 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_288/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a05b499ebc96a9f7f13a97d150ba295107d86df7ff225899c4d970f8ccf9954 +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_291/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_291/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a56a6c52942ea729932003437b8e10bbfc818c08 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_291/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee4577f2abff801dba34fc1fcd64d27fe6b5bf8564a46cf6ae36b91564ea1277 +size 21176580 diff --git a/eval-results/hellaswag/10/ckpt_291/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_291/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32ba6f61a7107dc7be0f05d296a2c254b28e78ad --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_291/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ebec6210029a9579ea2129829bb16b44a90d80af3cdd9ae445d1b93774995d +size 2844 diff --git a/eval-results/hellaswag/10/ckpt_294/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_294/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11bf6d90904100cd931402768e431720f16c0d9d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_294/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5104a2c4637892929f2c6d84f0c2995943be36307b8a84a3ecc5cfd57130769e +size 21176751 diff --git a/eval-results/hellaswag/10/ckpt_294/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_294/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..363012b7ab7b810c2d66fe72b20d3d9a85f61e28 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_294/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:428bfb2bb82f4ec86de80dcfc26c8ec6974a4b340f33a8dccddf0973b25a4ed8 +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_297/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_297/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64711da5ef61e0f91d20b19138e1662f3491583e --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_297/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f04b9e1168b61e31c1dcdd4a975bd7cd48ba8e17d7a7c8b8cfcd5293b4718d0 +size 21176187 diff --git a/eval-results/hellaswag/10/ckpt_297/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_297/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1247eeb585581702e1afb3c420fbc4a4058ac6de --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_297/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6394c83810507cd2b909dcd4a9179373c1ee2a7f7260c653d5141e59e9b7e543 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_300/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_300/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5750c692eb0ebf2cd94e67ef56dd27a8ff043ba4 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_300/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a812ca572c0305c240a450e74d01a6b34842d0a3a1b68c5d4c1159ef118161cc +size 21176583 diff --git a/eval-results/hellaswag/10/ckpt_300/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_300/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6352e2cc0b44beddf391b57a158196bda22cb6e3 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_300/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d0eb51832977fdae5692749cd1be2936556c5a29cfd2f08bccbbd286ad852a0 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_303/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_303/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8208eb330ebb54ddfaac038c6b221cfd9bfc0c8 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_303/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50f264ce5e8b0b4c84256e62c73f1a634da985182377cc80442ddf9c5b70eab8 +size 21176481 diff --git a/eval-results/hellaswag/10/ckpt_303/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_303/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64b4c84ee15c3b7f14ee4d35ad793748aa280e16 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_303/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da67eac015365bcff7fd51b70e09e2132b5f6845cbe9bc41e3afa106ac657cdf +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_306/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_306/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6589b9176e58d8a9a5431d751f957c74d65a37dd --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_306/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:645bbeccd8834fb1700734b17c6ff89c08abb78a6ca8809ff1db768374075ca5 +size 21176161 diff --git a/eval-results/hellaswag/10/ckpt_306/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_306/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50b3814a81065ed969f746e2e79bd882cc34b27d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_306/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9182ffe6ea8f4e6bb21614eb4e9cbe20d6f0d00de5e8b072b0c2562e30ed63f +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_309/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_309/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f4950629cec663c9b8b2a2a36ef807546c1aa42 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_309/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78dbefa4bb64906c75aea81e99902aa70b37fc9a8502b097fab23fcb48b7e8a0 +size 21176308 diff --git a/eval-results/hellaswag/10/ckpt_309/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_309/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5a1b7aa5bc2ad12e4b284f0570a92b0b32c7943 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_309/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a2549bda37abf2116e3c1a2509262d3f8ee95e9e6aba19c4518fa2739c764d8 +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_312/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_312/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38b04596573661b422d11e6953f5b3a66eeb17b7 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_312/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eda290fbe0c13780d268515106acb9d5727cdcdba4dce69022fa46bfb93cfaf +size 21176609 diff --git a/eval-results/hellaswag/10/ckpt_312/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_312/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9cbe0897af3e8b3e069d0d341420f71f9f049f1 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_312/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ba734c15241af97de30f06f60d4ea2c50eff6e3f76a57cc0d22af09354d4432 +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_315/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_315/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f524408939e2beb5b60cb7b9a6d7ff918c1e256b --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_315/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:793ab34fbc2c96585a945764f557d63cb189a01b295b18090d702c094e4c8d7f +size 21176399 diff --git a/eval-results/hellaswag/10/ckpt_315/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_315/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d0fc0a9dcf5adf50bc988451385b0efbdbb7922 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_315/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c42fddd48aeb33d6b6698a4fa0811c1ca9abcc02ca81e68d3916fe31f32c0ce +size 2840 diff --git a/eval-results/hellaswag/10/ckpt_318/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_318/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbba356adf9d3e94dfe27494ed7ebea36d610e3d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_318/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d138437d3985520d968fd7def5ca0575392ead58817b8520e5661eafc17da24f +size 21176434 diff --git a/eval-results/hellaswag/10/ckpt_318/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_318/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d86a40446be573364341e4a13ba577f2867f41d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_318/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7676d649b404c48d57f6485c388891bdfa8ea48971919085d10eefcac7b994d +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_321/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_321/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..599298479d85363e5af804b54fd2f65e8ddb687e --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_321/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16bcc9ab1c2f8370afb008c2087d0e1cc59e9944634fa8f27d8b76f66d6fdbff +size 21176653 diff --git a/eval-results/hellaswag/10/ckpt_321/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_321/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfa5ac4748da5dc17ade7cb5fe7705cc0ef209db --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_321/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3e882e452548bf10deaec2ee067d513a4e70d6fe1114a6107a6c41585823fdf +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_324/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_324/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb07161d131d5b026433eeaed83d27628fe2c23a --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_324/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13157947cf9370343a04d2ae1f0f84fd6fd9237960475ba4b41269bda487f9a3 +size 21176179 diff --git a/eval-results/hellaswag/10/ckpt_324/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_324/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d894910ce618065faa0e8191e48cc18baaaf912a --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_324/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc8d103fb9775dc82c8709f1a43c0886c8fdc315d350abba6258e54548d24e99 +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_327/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_327/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41ad5d9f649f7c9cebb6e013e93b133e9eaf7062 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_327/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98610cbdaa4eadd2da3aec6686e25cd442a39fbed3d86142f36b0b70447c4429 +size 21176499 diff --git a/eval-results/hellaswag/10/ckpt_327/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_327/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbbe2c8d12f40baee3fc90429ff2d0981c90aa73 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_327/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:718b862b577460931d4b1afd55e2de7fd3cd2afda16956ec472db2002c5ae40d +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_330/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_330/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..561dd9a916d8d0fd70a4507350513247fa9f3066 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_330/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eca6dd1fc9cba5d010c9be2b13d98da3e794e80c2583f180ff61d125ee7e9d3b +size 21175755 diff --git a/eval-results/hellaswag/10/ckpt_330/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_330/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f02efd4c875869314a75523334a574f23d110918 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_330/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b78fc47893d28de24fd29a15d732936c3cd1d43c9c501fffe975c5a27feff31 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_333/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_333/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84a88645d6d31c2a0868fedfceef5a36f582cd65 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_333/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a89a77befec766c11fed18b7785aeb9c00c6950066a95658a524d422ab6803d +size 21176483 diff --git a/eval-results/hellaswag/10/ckpt_333/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_333/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a64a4996a7c9d02b92c2670dfb724b54d72536e7 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_333/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b151fecd04fcaa2d830200b287f7c3158af0a3c2ef833e0b1e2dbdc8a21891d3 +size 2837 diff --git a/eval-results/hellaswag/10/ckpt_336/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_336/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cee5979e84334e35dc0172225231b2d8f532a395 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_336/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e3ac6508cc769e0f166b697d49efe93dd59b74865b3c54d9b1edc23e4821767 +size 21176164 diff --git a/eval-results/hellaswag/10/ckpt_336/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_336/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c5cec62960b779988f6761823a4f2b54e6d838b --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_336/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21569237337bd991ecefebea60ed9e0a418b784d391259bae66249c9a5b14b11 +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_339/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_339/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ab76a29ba05261d92e2189a9aafeed709f9996d --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_339/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5edf4b2f96b389df068658799f3d2602593d87b46627fd79833d2f0f8b8a1d12 +size 21175983 diff --git a/eval-results/hellaswag/10/ckpt_339/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_339/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd0f251eec0387c37c09ae98154b566bb7065fa4 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_339/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6bd35beca093711c10f04110a00292653923e3b820f150a5bdef485d5789342 +size 2842 diff --git a/eval-results/hellaswag/10/ckpt_342/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_342/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc1aad06d8ba94d2ee884bcc668c157196298a6f --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_342/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:720042fd138f5bf1780ed16643f93fb5efdd06a01589bc69ed325d1dc4c39dde +size 21176088 diff --git a/eval-results/hellaswag/10/ckpt_342/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_342/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7aef66e0b04612354fee0ddb4c6b07c6e700ee0 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_342/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:322a1d8870c2ee1c304f30591b1d4c4d545564633cbcedd59f369d8fc428fd46 +size 2839 diff --git a/eval-results/hellaswag/10/ckpt_345/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_345/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acea7d96065332f8721e58bb3dd14b4cdaf97c2c --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_345/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c42a892c3ab374fafa56787430c9e2538814b227eedfa73ed9efcb87330386 +size 21175859 diff --git a/eval-results/hellaswag/10/ckpt_345/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_345/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a87babea0b484ca50daa7bd4b9c1867ad387b9a0 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_345/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba9fe78e1dcb0d2e22b54c9937120330c693887aca996701f75778acdd0ae0a0 +size 2838 diff --git a/eval-results/hellaswag/10/ckpt_348/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_348/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e25e26a2451c385fafafe1f4a5ba12c808b0cf6 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_348/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b737528b4b9369c380c0736e4fdaf87cf80b345de2a2d5e285a38b5808fc1a6b +size 21176034 diff --git a/eval-results/hellaswag/10/ckpt_348/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_348/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..849038e28b2659dc75972aa48d672dcac39a2574 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_348/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83eb7fe8657467e613973248b5448892a7ffdf8ba04e1f60dd26ca7f7864fdad +size 2838 diff --git a/eval-results/hellaswag/10/ckpt_351/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_351/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ae37169644f7a04d7643a1ce2310395e9c6c79e --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_351/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31f470be98d52cbe4628fcb081778f68510b68b0b3dad637330d1b87c98596fd +size 21176551 diff --git a/eval-results/hellaswag/10/ckpt_351/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_351/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ac739feb000d3536e51a92b6f3a148803dfe76b --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_351/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d65ea5070529eb5b937bfeccca5cb520581209c1e4abc753f26a4ac8089cbf81 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_354/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_354/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d5f5cf359c8f510fb89991f935c94042945bceb --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_354/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fc3bc2cc9f9c0ee2608e797aa7f94ec96f08f338d355347dfca347c9badbcdc +size 21176367 diff --git a/eval-results/hellaswag/10/ckpt_354/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_354/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2623e35c1ce4f8ac484c90073b7d34239f72980b --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_354/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86afbf0d0587396f96d98609769aad26eed237392c5c9f0c8732530bddcca616 +size 2843 diff --git a/eval-results/hellaswag/10/ckpt_357/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_357/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bef04a3a9e8710ae7de6bd60572d39b8c5233b89 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_357/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db47bd77d0347c3891b42fdbcc18441ab6df2957f4cf048fc0dee6b76acb1ff +size 21175483 diff --git a/eval-results/hellaswag/10/ckpt_357/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_357/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8dc90dad2f6921e9edce989118c2f412f4d567f --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_357/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49ed77f9ce260e5bf0088be4e464d4f2dc10bae181346f5707114414c5136265 +size 2841 diff --git a/eval-results/hellaswag/10/ckpt_360/hellaswag.jsonl.tar.gz b/eval-results/hellaswag/10/ckpt_360/hellaswag.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d5eb8255da494385946d45a2b3756c830d92ee2 --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_360/hellaswag.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1075fc674ee0385da15bbd17f4f5fa36162cc9da14c2da3b5b8250b038a2effa +size 21176065 diff --git a/eval-results/hellaswag/10/ckpt_360/results.json.tar.gz b/eval-results/hellaswag/10/ckpt_360/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c82178616e2bff7bf74758cdf88532a1033a889f --- /dev/null +++ b/eval-results/hellaswag/10/ckpt_360/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e71ab96273368e119ce0aa462fb9ae03fd0c74baa450ab9b4ef422970b65330 +size 2841 diff --git a/eval-results/logiqa2/0/ckpt_003/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_003/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fb8226624f1741243eb305b6efaa47e7fa7ec0a --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_003/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa30e6b04d132b0faeed63ed3a6f21a702be1aaf93dd1529dcf7f6a7da8d617d +size 888572 diff --git a/eval-results/logiqa2/0/ckpt_003/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_003/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97353e52dd0e80b8901414ae855a5dfeea1b9cb1 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_003/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71d0df7ac4622b2fad9511d056eb9febf228965ef72e37bcea20073f892e1439 +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_006/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_006/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f47e0c5d52cef27447290e9dd7f339f8971ef08 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_006/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c6bb973bee74181113687ce88f1b86b5a77a5d84f6af71a614a80a39f06f335 +size 887208 diff --git a/eval-results/logiqa2/0/ckpt_006/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_006/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20a587453c619a8f28ad322f0ca18f73d4616f1a --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_006/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba8774cdc97956c0887d898eb0a047c56af24d9688da0d2344d95f121cb745f2 +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_009/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_009/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ece86f81b00b7199d202b5276c8277da2e08142f --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_009/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddd2a58c8da9777e05cdff098e8b0f704f7dfa248b60242ef92f6090a325447d +size 886972 diff --git a/eval-results/logiqa2/0/ckpt_009/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_009/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b21198546650709ab2a25e3a59750679fabf5889 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_009/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae40fa9245477f5c460f3d994a9e01ec91e08fbaae9a8a786220f547271879d +size 2863 diff --git a/eval-results/logiqa2/0/ckpt_012/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_012/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58ada871b1aee39880cddc3670a4e8e2304c55a9 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_012/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99016f8822a1d3f3bef2c7545c9b1f46ac48f6716c2cebb8fed8613aa01acd3f +size 886812 diff --git a/eval-results/logiqa2/0/ckpt_012/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_012/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e05a95572a7b7b9e6e1a0f02f911897f48e32f8 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_012/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791c1260de9efd872d0a7c856c03dc74a149433a57cbb6a3aaba01f4b1594da8 +size 2872 diff --git a/eval-results/logiqa2/0/ckpt_015/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_015/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27aca6bf520f409156a9ae87e31cd8f3bd01adc3 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_015/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a62afca1f85a68db54cb7dcab39e86dacad0e5d1cad916bc401fa024d84d4d +size 886908 diff --git a/eval-results/logiqa2/0/ckpt_015/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_015/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2f2b1d99fcb07abf7d14c402700eae308a36868 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_015/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed3978f561337a2e2c097c0efa4eadfcb0df98ee807bd26a7c44e5fe8a6c351a +size 2870 diff --git a/eval-results/logiqa2/0/ckpt_018/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_018/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c3adeeaec744d2b1a3bfcb1f321677ed220808b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_018/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:559a1221d9b1663e00c2f9bec81896e4ea2d3cd7fb04e0070b596d533aa3c6aa +size 887310 diff --git a/eval-results/logiqa2/0/ckpt_018/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_018/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2443c09400d7b8b0cff07571985d266cf91c141f --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_018/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73b4eb6c8208e372272af3c38ebe5d329aecb002c77cd6f6ad458118eb91c00 +size 2857 diff --git a/eval-results/logiqa2/0/ckpt_021/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_021/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a586095906c40a44307183df72e2ff6764618810 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_021/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74e229442e361bad6ff5f4557af6eeb65902e67e21e9eddd6520375655e7b3b7 +size 887276 diff --git a/eval-results/logiqa2/0/ckpt_021/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_021/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..107c6f835d1bef7412a8a25e9cf8e80152997b55 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_021/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:718fb9fb92d5c5b7b8f9c920f8adc118de7ab454ed626c045226c8d3a777a07b +size 2898 diff --git a/eval-results/logiqa2/0/ckpt_024/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_024/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..417760e13ec59a7b832201b81cc2f912ffadb2b5 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_024/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a54f7e3e35a478951e4b02fad40a8a65fbe11556af0af6dd8917e966135f5ff9 +size 886800 diff --git a/eval-results/logiqa2/0/ckpt_024/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_024/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30f7ac71f11584e4f9c5aafa3c870fdfe7c3e875 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_024/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4d7f916b512878ae814f6a392a3387a672210ab8e9a1b1afaf900ea6e156301 +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_027/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_027/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..658ab4a4ed360eab152aacaf6a93551c97c76f56 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_027/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c9a2750ab69eb89a4fe58e8b1cdeb0dedeea6043d22ccafeab80b359d4608d +size 886878 diff --git a/eval-results/logiqa2/0/ckpt_027/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_027/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..171d0aff9dca741f37a52d5af3fbf6ed0cde5f41 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_027/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c73e92fc4bf55d782038117090cf7cd4d099cae10ae8735577424b982a5c8b47 +size 2870 diff --git a/eval-results/logiqa2/0/ckpt_030/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_030/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d58d713375d2d1c6cb71c8c937325314ba7b598e --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_030/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fac376616b2871a89b4b78e6883a58ca7e237b6b1019f2e961d68e5511e97d3 +size 887218 diff --git a/eval-results/logiqa2/0/ckpt_030/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_030/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d514ffd94041599584fb1eb4e966520a0872b071 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_030/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbb94751d36f1b0522b669473cd2c9770ce1a991cce89f1bccc557fa8be1e7e9 +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_033/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_033/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b232e46c7b8596b28d3e2afb93377bbb476ddd37 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_033/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7c4cff2d0968a51bd2bec94bb3793bc6abf707ee199c1d54e4ac609a7bd82ee +size 886905 diff --git a/eval-results/logiqa2/0/ckpt_033/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_033/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..805e833e49a90fec6e712b569f9f0b41de2cdeea --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_033/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e2222a0fde0a7a3f3d4fe5007b827ff95a6829f9d89b4299a8d9955a8f3918 +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_036/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_036/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62f0ba7dc569c26bd777372798c4d6f12d1dd874 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_036/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe08648262de43266ff6fc234a8f6fb2cd8b4363caa452c1fa0032fd3252074d +size 886606 diff --git a/eval-results/logiqa2/0/ckpt_036/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_036/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23d399277e1258d3fc52e09e671f73e0fed7b49f --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_036/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14e9cdf216196b031354d9e3672e3013a3f58aad3764753ab54f1ca67122c29f +size 2869 diff --git a/eval-results/logiqa2/0/ckpt_039/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_039/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fcfb7290df9b9125356986a0581ef9e7b990e17 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_039/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f40960475288658606bf919068ef5ad11c685837e6b27eebada12ea7523a2d90 +size 886759 diff --git a/eval-results/logiqa2/0/ckpt_039/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_039/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ff332d147d3ae83cdeb8e8c4ea74509748e616f --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_039/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e1ab8aa78daba4a20ee1e600df79a2acc5c01f00ca05f34cb92eb8367d21b29 +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_042/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_042/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a4bd892f95aaacde92a9a205618458193a23fbb --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_042/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b5c3419d53f11eafdd303b531581143d6c4e68db035e79323cc1d837a7f4908 +size 886728 diff --git a/eval-results/logiqa2/0/ckpt_042/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_042/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a529c7fd1305312d2b6d7f79a8679d8921728a07 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_042/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ca024b71bea22d9e66a6bac26e962e41c44415f62468042a0a585516734f935 +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_045/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_045/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9bd4ee0bb8015095c5bbb87f9cd52925d06b6d8 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_045/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f6f700662d2f451159bfdbc3437a31d29eb358a3e0f3994d2946907f44e284d +size 887072 diff --git a/eval-results/logiqa2/0/ckpt_045/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_045/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb34a0f93590c686442efcd8a9baea195b9b5518 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_045/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed4bae5f506e5ee1c35cc9255e83053b1f341c567101ccc32cd389cab15416b +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_048/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_048/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8d9e9840addae74dad0a057d5413017b7796dd9 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_048/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fc195d7824c9c2cc896ea0834d5dc56a940f7c3136a1eaac71cf2905ed369a3 +size 886811 diff --git a/eval-results/logiqa2/0/ckpt_048/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_048/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2ac141f044548730063dd37631750af59dc6a8f --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_048/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0eba8538bd147299a2c11e9b5f7b6f30b45c02aea804193b8b192f39aedc5a5 +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_051/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_051/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1baa305eaf31dc714f6a8b50727e519d6addf3a9 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_051/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20e18c619be83a3b0e3b7af3eaadcac626d9b1675fd8cea7d47d9ef0ededf62c +size 886710 diff --git a/eval-results/logiqa2/0/ckpt_051/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_051/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22354edaa5b735c1141a49a7be782fbdbaac822f --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_051/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb079f332f12aa4d8f30f480dcff751be4719c8e827a2ee3a6f06e495852af54 +size 2894 diff --git a/eval-results/logiqa2/0/ckpt_054/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_054/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2dad6701b3dba3b6390508ad694318f12b008c2b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_054/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7de40acb8fbfd69de82774126e4fba3fe74fdca00c9d704473a78c6ce6539c9b +size 886976 diff --git a/eval-results/logiqa2/0/ckpt_054/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_054/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a95196be8644e5959609078b230084719214bd5e --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_054/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d0cb60b30d1edce716205b00f2d69de9437ebfba9b7659b37101ff82df6525 +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_057/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_057/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d5c96b3440455d4d7e4d4bcef9687b9e9e03c5f --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_057/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3748cff962634811923f16fa6ff1ecf371c6b4ae9deb4ae84f95078f9540d1e +size 886819 diff --git a/eval-results/logiqa2/0/ckpt_057/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_057/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1a06de460f23f2fbdae4cbcb699601b46b70fd3 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_057/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e650e6b70a43dade540ed896a19b0e8aa7eebc2f55d44b374c93dd301eccfad +size 2869 diff --git a/eval-results/logiqa2/0/ckpt_060/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_060/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..164d10de2ecdb9e5b4b8fa357753550d142fb7b3 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_060/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe2680ecbce4c15a177648974b515545422897fe80cb664c5dfa6bdb25a1c2fe +size 886799 diff --git a/eval-results/logiqa2/0/ckpt_060/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_060/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e9b80fe4aab010c9901a52dde69a0be8ecf9830 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_060/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caa379217c11876ca0780db18225a0d320781739791820310ab7cf6efa55296e +size 2863 diff --git a/eval-results/logiqa2/0/ckpt_063/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_063/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c1ee86e70631f15bb0e883e6def45de9ec7e3ea --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_063/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d79f69aad8130f1f7b5d2c78b4ad108b684d90de0968b076893b7ad2d158351 +size 886831 diff --git a/eval-results/logiqa2/0/ckpt_063/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_063/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31cd2b5980958e491f715e3090b47250cf4ab245 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_063/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4597f881797fcb876e0de72781cf37d323f65519b1a1a70ebd984b22dbf95972 +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_066/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_066/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49bf266f1ef886cc41f82c8e18ea96c3b53c59b6 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_066/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d1153682f718991e448bac5db7c957aaa9fceb220015288603523fe8cf9f4fe +size 886790 diff --git a/eval-results/logiqa2/0/ckpt_066/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_066/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1f3a2d49bba19555637c2ba153bd697fdc950bb --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_066/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4355eafa8c549a5e24ef55af053c97c71f68b1dee1feb6a6b2aada84a6f11fc3 +size 2863 diff --git a/eval-results/logiqa2/0/ckpt_069/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_069/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0aa917719fa38cb4ce90fc222f34fd9cfd13c60d --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_069/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7870741f1520dcd74af17d3a8cdd0827cf95c1d250ebc0d09ae156098beda81 +size 886822 diff --git a/eval-results/logiqa2/0/ckpt_069/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_069/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4cf21b7d7a71007090917502588d93a9af25955e --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_069/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:419eeb095f1a888a903c981afdf71685f03105b2e3121ebbc7a7c8a5f5147121 +size 2858 diff --git a/eval-results/logiqa2/0/ckpt_072/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_072/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd551f825d489b0161f7c6f5b4aa2dea7fbab6b8 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_072/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:721cf00a8c28fb1abbfcc7c0933f0db67bd7fea5926efc2e89f0946c8e9158e3 +size 886677 diff --git a/eval-results/logiqa2/0/ckpt_072/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_072/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2417d5ed2b1a15c4c61ef09e47c11074bf7309b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_072/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f972ddfc4966aec44389149ca238f82bdc83552d9c2e3fd8cd4bf699eb9c5b +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_075/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_075/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35051c2f16e98a63ce7a9355a0b97e5323f99344 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_075/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82790a8449b8bf6990f2d9f578ef0270adf61dee57fe695445ad8457e0a9263 +size 886748 diff --git a/eval-results/logiqa2/0/ckpt_075/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_075/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78a07b5f4bd60505998b759e6e0689c7cc6476d8 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_075/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20fcc8a2e88f5eeeb8a76569c3106fd08e40ab2f95f5aa676d0934a30cb7188 +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_078/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_078/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5303637b6f05a66114241b5e305763b3e365987a --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_078/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c145bc3a9bc8eb64cc756377e3655684c52ab2c44b7094e4a0495995d3822bbc +size 886803 diff --git a/eval-results/logiqa2/0/ckpt_078/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_078/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd907b74da37f8b9337878631611e7dd286956a4 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_078/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9427f726188572763176fe9932564c3d585804913b5ca823ba04afe0ffde8486 +size 2863 diff --git a/eval-results/logiqa2/0/ckpt_081/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_081/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70a425e4d69fd1477041d063051e7e079b239e8e --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_081/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:856ee6b332aee09319cb26f7b79c5419bd83665b45b478a66a0052b358a4e641 +size 886806 diff --git a/eval-results/logiqa2/0/ckpt_081/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_081/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c9c01c53e1ae702fc97569690a710c635ec7c67 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_081/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d04c9218423d0b7f70322a548831906384b404f6f019ebb7f51d78e90bcd373f +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_084/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_084/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc69669d1df7e5bb41bdfc4f1330388ab109d887 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_084/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb99026220fc3642c048f7dc3bb6143261803bd1a2f8a1af5938b3ef678fe14c +size 886545 diff --git a/eval-results/logiqa2/0/ckpt_084/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_084/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbf941d9434a7092110ed31302ceb076504a81ef --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_084/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40d219586f67947b2bdb9ac335e478db1cbb1fe0e5bef0ddd2c8f9bf4841c663 +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_087/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_087/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcddf4705c321a9699875a5701174d0000871c60 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_087/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1d88d100b1d422fd61c6cd6aaf25532c15b61037994c79f1938584df3379d51 +size 886902 diff --git a/eval-results/logiqa2/0/ckpt_087/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_087/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f02c6c4a8efe9571c0b8a2da4422d02e8c912021 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_087/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49adff0266af872cf0c4c696b22deda7ace770b2bdbcbe75f240aad4b742b5e2 +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_090/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_090/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..147d311176fbfe7d73b30bf711157a53ded470f4 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_090/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31049c479658f3092f3ff4fdf958764a4d63a31c39aff76015dd9d0a42281974 +size 886915 diff --git a/eval-results/logiqa2/0/ckpt_090/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_090/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b0667cafd1e487d29a987aa5418cb73b29966f4 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_090/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6a81129b116c622b058f9bc5df9ee9029893924f1554e534f02d6be46bb5b5 +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_093/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_093/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbb9a29c5ce57ff7bc79347bb8748d3cd8b815a8 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_093/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45149e27718b5552ba02d4ed5668458841c075810b43b0574931c4f8b81e5ca1 +size 886896 diff --git a/eval-results/logiqa2/0/ckpt_093/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_093/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..451bb04453506f5f28b32ffcf5eb3531ac468df3 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_093/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90b28b84ac1fd84bb7498b5f0b5787795fe42a35359db643448355917a4dc18 +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_096/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_096/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a485b92a5fbb28cf063d6f67d61cfc111eda188 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_096/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:661b30a0bccbab39b20ee82593c3eb18f59c36fdeb6fc734204e9bbf835a6461 +size 886967 diff --git a/eval-results/logiqa2/0/ckpt_096/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_096/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27bb03c706578bf23f817e4e92d2668869cbf6b7 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_096/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e0305a7aa1e988f2057fd29c8760c5f0d4d752e103b4adc54df642142cecdba +size 2893 diff --git a/eval-results/logiqa2/0/ckpt_099/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_099/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6cd9f0b531bd81460659c3ac2aa7e9d985f951b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_099/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc2bd75aa34fd63ff4a7b8a9c5c83ba1b33a318c9e91bda94c00b98f401e3ff +size 886788 diff --git a/eval-results/logiqa2/0/ckpt_099/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_099/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1261f9ba53708d35283a16ece6a920b22235791a --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_099/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9405e4cb44b6da78c063754b3e34e69817aa87c556ddb167714fa3e200db93 +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_102/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_102/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c47fe635e29271abd7fd5405bc57b694a5e46f76 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_102/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c01f4dba7983cf22054918e18eba439155e8f182f2c3ee3927a5c104aca2d15 +size 886675 diff --git a/eval-results/logiqa2/0/ckpt_102/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_102/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35c41085fd35b960c192072380ac5ec3cc418300 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_102/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d7860e91f0d05d383333e4e2b5e8c5219bab63431ad77399c37d72241c1fa0 +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_105/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_105/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a97d4147469bb72aed468432aaac8c84129b4ad2 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_105/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:298a4e09aa9af1d1f251936b93fd035d131d204a91ae03acc4dcd24c637fca86 +size 887055 diff --git a/eval-results/logiqa2/0/ckpt_105/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_105/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8be3869efaacb9e29d271c9a05cd804a7b7a6185 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_105/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5f93294204e21e5bf7c46ae911cc7f16376091a1f67815e5f902f76e236ab2d +size 2864 diff --git a/eval-results/logiqa2/0/ckpt_108/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_108/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c4636103a00edab3048d95108cfe248afab3921 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_108/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5202e529fd36f6e308dfb64223cfb3d1ddd9894b9efcc0f2f122382d554c666 +size 886783 diff --git a/eval-results/logiqa2/0/ckpt_108/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_108/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f145e771db4f7794fecc2c58983306f2f5d28c7 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_108/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a2c53ae27e70ef6a635cf73cad7a00e5dd62d267d7066392f9e5c7e4a1440f +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_111/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_111/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dba1a94d15625c6cc453f6bbb841de58eb4440d3 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_111/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc44e965e4cc750120491b3197a853ef4a6222ecd882ec61d0818c30e839bf0f +size 886938 diff --git a/eval-results/logiqa2/0/ckpt_111/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_111/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ce90e46599ad3eca95ec6a0884484e1703fb5e8 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_111/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b75867583a117a67cd453bc0333af877d4213f6a591dc4515dd168eacefa49e +size 2864 diff --git a/eval-results/logiqa2/0/ckpt_114/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_114/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..336dafaca251e86c59a7531dff1f7e350e4e0095 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_114/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ca2e2627965879a77b024fb250b339205579eef78a0b65a925221bf507339a +size 886835 diff --git a/eval-results/logiqa2/0/ckpt_114/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_114/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1fd4952f5c0957eb446dbf3b377486700c9e9c56 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_114/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f2846a53283b17061a3e3bd4645a8d37002570cf155ef47de9808adc8dbbde +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_117/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_117/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9ce92f8f6780373698954222d6f0c975811bf37 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_117/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da329d54708a93122e95bafb6e1e2dc0e2a99701dbd5ecae036a920957974fc +size 886831 diff --git a/eval-results/logiqa2/0/ckpt_117/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_117/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8530db82f8ed17e3acb2ae3734e2cc4deaa8983 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_117/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae5af83f8f68a3c8ffe1d4e6e20856af6b4eac41f0edffea5373baf07f226851 +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_120/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_120/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8390b90ec8bcd5dfca63c017bdf5ccee3a791639 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_120/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77065cfb85eaccda95fd1e2cb59531c8ca5988531d5d6d9a4064f41005e53f0e +size 886794 diff --git a/eval-results/logiqa2/0/ckpt_120/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_120/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5ab9fa61e11ff5c90d55fefd21921562374e7dd --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_120/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32ce5cfd2790e61df5b5cc911fcb91a02f0b1a8364e425c594103e56218228b3 +size 2899 diff --git a/eval-results/logiqa2/0/ckpt_123/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_123/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7e95d614ca6d88c0c1a3f9329ed449851b98eba --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_123/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e238ab281240544648adbb3d3762a7ade426f0c09b2944a132619ccc86eb8620 +size 886872 diff --git a/eval-results/logiqa2/0/ckpt_123/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_123/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcb2faf2cefe5a2def5aae867b94236d67d5377b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_123/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf1ae0edcf9735b13794d313b7b1312480a9fbab8d52ee5aaf2cf9a7ba32d3a +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_126/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_126/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d99f541581c10f61015040e964333cdcd110a8b4 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_126/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4edbf3a80e7349dca70883990814189526d9e5388fe0678fb59b5302131c5efb +size 886913 diff --git a/eval-results/logiqa2/0/ckpt_126/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_126/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..847b2da42b84aae133933ffde68a5076c01ff663 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_126/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36beb20581c8d06aa384427d3611b1b1713602204d82f4fc74d68db5fa3fd46 +size 2900 diff --git a/eval-results/logiqa2/0/ckpt_129/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_129/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c172a7fa77bcf74e399bc3e237879412c3053b6 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_129/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3556bd8ee42e58ba170497c3023cb059f93fb0dd847cda60e20f2d699bf08452 +size 886705 diff --git a/eval-results/logiqa2/0/ckpt_129/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_129/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d4b7b77ce947757dcdcd5bbf2f20109e66d43bb --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_129/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7256d50216dc9bc760f9679b10d24e5ec82b349770e294bc66c389654cc2809e +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_132/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_132/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a80477efe90289f1d60fa1c73eafdbd88eb7c3a --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_132/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84820e6c68cfda14d03c2981206d6d0a4ca8867bd903ffb43ebccb6c9f858264 +size 886807 diff --git a/eval-results/logiqa2/0/ckpt_132/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_132/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95b45ad69f7630cd761e394d7a01bc00ec2553a8 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_132/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be9cf516dce45e9194e654335a4bda9368f15090e8cc1bbd0dd2ddd04ee64f5 +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_135/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_135/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..206176891e38059721d318e146bb7d9a7ac91ca6 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_135/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8ea2cf0c841c0e31615926e75b053cd7066dee1734dc334a5539093a305cd4d +size 887135 diff --git a/eval-results/logiqa2/0/ckpt_135/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_135/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..809471b1baf5e9fd591a275f42e832abdecb4287 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_135/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5270e64caaae7e78d54936b9d6b1563f8cfeed8b8af02070dcf01b23f1bedec +size 2896 diff --git a/eval-results/logiqa2/0/ckpt_138/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_138/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce38d48502f1940e675500ca4b2ed79e7e13bb79 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_138/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:874fc4e5c96c003461fb57f20203924bf77fb4356257e42f2713ff2e0dff58a8 +size 886881 diff --git a/eval-results/logiqa2/0/ckpt_138/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_138/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e4ef53c25d0221b703b1266fac23591d6fc3695 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_138/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88114b45e50dda1bbbe9464936fecc969ad3ec9e86e4f4710e06b3a9a9c4ffc8 +size 2869 diff --git a/eval-results/logiqa2/0/ckpt_141/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_141/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..619b2b47781d61dea553cc4f892564571e595315 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_141/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ca2fae4121382bc748ca4bfbf1dfef815a8efd8471cebd1f2c7097026b8a8c +size 886737 diff --git a/eval-results/logiqa2/0/ckpt_141/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_141/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dec7e5b768a2ca4eea75e9da1a018d769c2290f4 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_141/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e74017dce91aee1dcf28ba409226e8948cd95616949ff851d7b9dc9aa89543 +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_144/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_144/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78c9e8d17a420889b658d3ea3eaf83e407abe11e --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_144/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:084385f31a3acb12d25eb3745f9ea28df2110ee79e7af66fa9bf23312c4d534c +size 886729 diff --git a/eval-results/logiqa2/0/ckpt_144/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_144/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f46246b6a5786591307ea66fd3f45cc9f28c835f --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_144/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:524c78c953ee4e6bdc0d5d3d1693ef0833b3d0b7d0a3483d4416ff26c50d541a +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_147/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_147/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de909d3a6143901beea1b6dcada6a00194c1e7c7 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_147/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:925d374751db60f72a18547aff8e698ead2460dd9ae705606aeb475ecb3e29e6 +size 886960 diff --git a/eval-results/logiqa2/0/ckpt_147/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_147/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3db7c6cfe78f73ddd58023c02c8f73975da3d5e5 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_147/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae9fa70171bddc2149a059cd8c45e734c308ac1add1e76eb2dfb78dccdbb9c4b +size 2863 diff --git a/eval-results/logiqa2/0/ckpt_150/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_150/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec392a2695befdef85866903ae94397d3be71fe1 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_150/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98fe09e21dabe4cfeea919e5649a629680be2bba7df09c51696108de6bc2048a +size 886935 diff --git a/eval-results/logiqa2/0/ckpt_150/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_150/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa205c243c8c28084e4a7d928dacf730710ab9c9 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_150/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ead6102c42ac7a82808a889104484eacf67655f95d5f9c21c83bcab57e92ef8f +size 2864 diff --git a/eval-results/logiqa2/0/ckpt_153/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_153/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a3af9d3c55eb4e45fb4bb24cd560073dd033469 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_153/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60616bb75d9ca89a97145cc39d3edc949baeea39801ef77546775aa9c1982462 +size 886693 diff --git a/eval-results/logiqa2/0/ckpt_153/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_153/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11c02046c38966cf9dfa9b905ac1db95f90e3c2e --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_153/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d219c1cbdc14635670d08ffe4122fa4ff833f6b05328e284fc7d17930929f6a +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_156/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_156/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12d2f7aaf90f894100fe041b33203f22634169e1 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_156/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7214635be2a73c5c0e9a7d94e476a3023010f4b7201a745ec0eb55f5b193c1ff +size 886920 diff --git a/eval-results/logiqa2/0/ckpt_156/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_156/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84771b6af683933a20f8397e17f829dd0f68855c --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_156/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feefb54366c8a45cbd1cc192df0b1d552a3cd141002a6b6c6d4e29a8cad7f89c +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_159/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_159/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..543f66a6c06966b4ba3a917dcc3bf2571e3163fa --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_159/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9613435a8d915d9f1d5633d352d76d214b4858ba5588721ed7036b46c31fa2ee +size 887020 diff --git a/eval-results/logiqa2/0/ckpt_159/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_159/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59a1cafe3fc2bc249cfb778507d77b1f954821aa --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_159/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6e9b16186c88778c63081b6e216da6007d0942d3f87b8917882f28e68a4cdcb +size 2856 diff --git a/eval-results/logiqa2/0/ckpt_162/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_162/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..affd4a13e40d1b549b19c62631a85d1b05c057a7 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_162/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a401bb4e3795008b81af5987c0d6f3d041ad923a51c6d5ffbe2cdc73bab9b650 +size 886725 diff --git a/eval-results/logiqa2/0/ckpt_162/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_162/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da454115f3b14464e2de2d58ccd435d2d47ab403 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_162/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1c8f1a33f52035606b766ce50ce61c4a050c36c94905ff3d3944c9e13482f1e +size 2869 diff --git a/eval-results/logiqa2/0/ckpt_165/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_165/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2803824d07fbe1403060222f0c2b76d7539c0377 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_165/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843bbf2d7b10990c6c54a82341ba7df282d3d1f7ddc37d0c533785895df76113 +size 886893 diff --git a/eval-results/logiqa2/0/ckpt_165/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_165/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da1d34447b7789aca860eb410f30b3302590b7fd --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_165/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cb08d7568004aa82f4d7d49093402cc2e4f0d6ab2cb25c6f0595b1e6cbfdfb4 +size 2896 diff --git a/eval-results/logiqa2/0/ckpt_168/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_168/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8eeedb4aa2d07a6314e0330a0bb8b7d94321a32 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_168/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be51d96c2d3676c8792b6a3212d79e3487a43aac6cf189afa3b616b09ea4565b +size 886999 diff --git a/eval-results/logiqa2/0/ckpt_168/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_168/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90e0c42c0347d68a5aa1cde5db3ee19a9373197f --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_168/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c4f253cc9385b085826c69a80e3edc7e88338d3aab1d7141a9622014b3866b +size 2900 diff --git a/eval-results/logiqa2/0/ckpt_171/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_171/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72fcf5637995062f567e3c579b4622638bafd447 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_171/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4df00ee5d33a9afc8e4a9f1a463f97efc75ea82326963d8e8921e909f97b6e4 +size 886777 diff --git a/eval-results/logiqa2/0/ckpt_171/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_171/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9948437d57893397352d627c05f8d136f2a242a4 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_171/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4aeb3696b83c98bd58cf8f67fd7672feb0aad078edf7107d1b3f85ef82febea +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_174/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_174/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edb3efdc30d2497d6f32c86421eb5c5ea92b4b8a --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_174/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de9b4454d08f3271da35a33baabae2463b97a1fae63489a4965403c50706a8d6 +size 886911 diff --git a/eval-results/logiqa2/0/ckpt_174/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_174/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17502fd84a0b079f630873b3706b27774269ed86 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_174/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f6e98828f22774acd5444fc6420c898a80e2be882ec7ed01e4a98c5ed302d75 +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_177/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_177/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d89b947863307249ede7ef26f1b54e0af1f3a548 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_177/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a96b7249d2e147b21d093f19a444bb4c748439fa2aab19616d5d6e524a85e0a0 +size 886959 diff --git a/eval-results/logiqa2/0/ckpt_177/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_177/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..adaec822b25a44690c73585235738de3294b9d1b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_177/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b0929fb3089ae1dd7693949e9b87a87aef72a3a188cf39cc3cb1ace6bdab28 +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_180/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_180/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0420167bbaba5bebe988ce7c9c807ef5c710f5de --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_180/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a51e791efde41d6d568522a69669706603a28e475cbc8f4ab030d1e6c6a0f6 +size 887052 diff --git a/eval-results/logiqa2/0/ckpt_180/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_180/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6570c39f9be2a6d83d354e57c05122e6ed88d091 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_180/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98bc89be2397b92a6fa91bfaccea3202f1f77a7714ff5635d9dd8aa1419354c2 +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_183/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_183/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a03be79aed66e17ad4ceeae51760f141f95e6ca --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_183/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:602671368d23daf85b6805936b7a060b889fb87e8d7afff8f9e3415294c04ae9 +size 886980 diff --git a/eval-results/logiqa2/0/ckpt_183/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_183/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20e03840927cae37f1b7747f5533b283b9a32f08 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_183/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:566202c333134f20faf88bd286ba18dcff75975c94a775a10a6532449305fe3b +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_186/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_186/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0716a58c07be123b67ac64a18024c6d1bd92811b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_186/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8332743ee3bc7aef1f74d5a4e41db6a47d7ce1f9ab936227eb308abdfb5e0ca4 +size 886879 diff --git a/eval-results/logiqa2/0/ckpt_186/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_186/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14ad7262870291b09bd2ddf8345d06ad4c897f69 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_186/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6873cf38e79b59f576da0241882dff122dfad19ac2bc8b8c63455dc7e7fd663f +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_189/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_189/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..370caf4c4dfb0cbbf2a44d439b443d9ff23c07e0 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_189/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c486644ecd4b4de97b72f4f512df2b07165e909263089cdc0b459a24dedffcd6 +size 886694 diff --git a/eval-results/logiqa2/0/ckpt_189/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_189/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7856d56d1feaca27204fab911ce9864372136bc4 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_189/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6383d234c685d5cb32cf4c0115beadceac08c3e6fcc81fc9a4e3d16b1867ea28 +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_192/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_192/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2303c66fd7a9f5a19febe363187c5c464ff863b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_192/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd764e4b27c418606b71ee9468e10a10a5df5c30ff841e6597929750729a7664 +size 886693 diff --git a/eval-results/logiqa2/0/ckpt_192/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_192/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..457ab9881015890403c81cdfa34d53deae478f57 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_192/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:933fe37f790388949cc9fba711cf76ffea0ff6d493ab0334ac529403e08454c8 +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_195/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_195/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7022141f6f7ea6f394d76a5b11ff21574e5933d --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_195/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ec9f2ee1adf159aee1e0d6b40c48e984a3700387a6c95c3080665db2c7fb9bd +size 886726 diff --git a/eval-results/logiqa2/0/ckpt_195/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_195/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de12c399f6b8841a6e7087be9c4f24b51a6dd7a8 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_195/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9443ae362f0466cd439d7a3b723e9186a7d76c4ac87889e451bd2caed7a191b3 +size 2892 diff --git a/eval-results/logiqa2/0/ckpt_198/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_198/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2d124a921c868cb71479a30c7511fee7388cb53 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_198/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f8ece48fe6983e05f1211977e078547bc279aedbebecc112a337564a5ea4c1f +size 886866 diff --git a/eval-results/logiqa2/0/ckpt_198/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_198/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfc208c73b68ff2b89f690779d67e17709daecb5 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_198/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25205bff4100623e42a7d72d482cd7a5a43abe77737328503f4f803b4563e61a +size 2897 diff --git a/eval-results/logiqa2/0/ckpt_201/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_201/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc09d08b93d26b73a699cbd8d62021bdc16b52d5 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_201/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e630c3b52c91f6ccd899eabcec36e60d169b7b73885d072114c84943cc39965 +size 886773 diff --git a/eval-results/logiqa2/0/ckpt_201/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_201/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb93d2a45d84528e32de7024e4f4a8ffc175118e --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_201/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8d7feac2314800a026db2420ec9d034ab39b1b487e171ca852b24001ffabf8 +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_204/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_204/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c59396cae70a0a499d2092f2cc3409bda08d34d --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_204/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58d86fbd2d60ccbc4c5e6a4bdb5ee5617c398ac982c33a2a052a08f3feb72b42 +size 886772 diff --git a/eval-results/logiqa2/0/ckpt_204/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_204/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3122a4db0467964d0f43f2d5a529cf1210b8a96 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_204/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ea44149b528e2003cfe27b9257a70eeeb47f8703146b8111562974e509de07 +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_207/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_207/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5dad17fc24444c25f2e15c934fafbca5569d51f2 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_207/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f7c5ac97ebf4b3281271ea2e2cc75db36f5758d6d51a77e94b50407ffc5cea +size 886723 diff --git a/eval-results/logiqa2/0/ckpt_207/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_207/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90c06d771c443747db97c6929f2d10969f00e710 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_207/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:642577c56e2763ea72a31b7aa8ea55644c6165a111a0f66b97f0ddb960d5f909 +size 2864 diff --git a/eval-results/logiqa2/0/ckpt_210/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_210/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9068b628fb7c0525b4e75248c670df732d535f25 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_210/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a359d00c2f4e7c862fe0a6d4001927749790d6fc30b162e99fa5fb72bc349e2 +size 886598 diff --git a/eval-results/logiqa2/0/ckpt_210/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_210/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5a1477d48071fe2243d2068b101859d55bb98d7 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_210/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f00a9618169778209bf99d7c9e2d107f7d69524c5272785bc570c6608dbad4d +size 2864 diff --git a/eval-results/logiqa2/0/ckpt_213/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_213/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac325856fdfeb77b0c98d60960058dcb6cb60a3b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_213/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ec6e173a7f360ca4541d73f9fc96dc7add93543280b030fa2bc2ff87981968 +size 886797 diff --git a/eval-results/logiqa2/0/ckpt_213/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_213/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9078581ff02d03258fc10941e378682f132c868 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_213/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843ad03a8e81db2219b7f9835ea9e75e9c3fd80560aee3aeaf8c36e7f65a9006 +size 2869 diff --git a/eval-results/logiqa2/0/ckpt_216/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_216/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c7eee2141c324ce79da28a02bd8a0fef156034e --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_216/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fd8263a6a4b7c0d8965d9c7778623ee901e1a9d8987da78ba64b9f0a74d034b +size 886839 diff --git a/eval-results/logiqa2/0/ckpt_216/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_216/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72cbaf467671c94d0a0941d63a9f729b8bdffb7e --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_216/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff1721411d4be7bde80541dbaf3a9e74227477e920b6fc5a4dea6ce3522efbc +size 2896 diff --git a/eval-results/logiqa2/0/ckpt_219/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_219/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fa1c9e0792ad4fb4bc2a8a338c2caf7581b070b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_219/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7182d62462e93d9602a5b850df5e849d407d0aade9e8fc1136647d0960606ad +size 886972 diff --git a/eval-results/logiqa2/0/ckpt_219/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_219/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..741005a50603561a2bc493aafc5830a5419111f4 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_219/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3e35c40b09157971b068bf8ec2d15a184ded7f5b4eede98361966cad53d60d +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_222/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_222/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be9cd997cc53e3cdf121dc177b01f4efb617db4b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_222/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ec72b69a2eab7776df4939646408234e164fcfb33bfb56618dd2114537c8bbc +size 886764 diff --git a/eval-results/logiqa2/0/ckpt_222/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_222/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02bf11747daf5dc05329dcf34eaedaf2db19b2df --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_222/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab9ab2e693c76cda3fecaa289ee4d480e104744cad453b848be12be8f2cbcbe +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_225/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_225/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d667b1c5a22f0566076f118941b4e4407f99c0c4 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_225/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bad49d8b5b285ebe7745d2ab6ec0ab97aef7529421c08e5fb76e5c933994445 +size 886827 diff --git a/eval-results/logiqa2/0/ckpt_225/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_225/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..886e4660c466ca930183e48ddb045a29988daa0c --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_225/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d67088088287e93a14bef7b69ba0298418fc11eb52a3e938cddf3fed5cadfc +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_228/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_228/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f5dd47fc25f71b66335b1a00775a709ef508f71 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_228/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dde4a6b53cae879c6827c02c0da605ec7a73b03cec51e4f0cc3062deb3591e2 +size 886863 diff --git a/eval-results/logiqa2/0/ckpt_228/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_228/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7cad9d7a028b4a0a67175ae09c08ea5284e8af1 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_228/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73484af3e88ee01c2a67d9715d28a2db6d8dbb6176efebeff00be1c4a68d43c6 +size 2870 diff --git a/eval-results/logiqa2/0/ckpt_231/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_231/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5e95364f63db8cbae057eed81c19170c85c14bb --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_231/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e024ff1102b885b7f16b03f5e8ecbae158dd99a4d8c2672f33ce43018fb98ed +size 886764 diff --git a/eval-results/logiqa2/0/ckpt_231/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_231/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a35bcf38b632e2c6ad6b2a202561216915a2623b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_231/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28f4275ad620c58d84c4fb50f78b3a9ca90eee894b1890fe64e89133684c527 +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_234/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_234/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..486d6dfd095f3b9f2c757273396d70f302e979a3 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_234/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a32f59fa55b316c058f3b633306e8059ce85b268e276b63c5940217d7cce22ab +size 886918 diff --git a/eval-results/logiqa2/0/ckpt_234/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_234/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f58dee4a4505af180ae31f4737a8194f63a83e9 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_234/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da59c4f6141bb973c03dc6dcf50942ad1aa1d8fe6068297a995fc8a3d08e99a0 +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_237/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_237/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83fd8adb0363402c79b3fc33b00c6fc23392e7a6 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_237/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44481bd2c383b99e81519e5a5188955463135c70882bbcffe70876a4c61749d5 +size 886644 diff --git a/eval-results/logiqa2/0/ckpt_237/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_237/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34921fd9842fbffb90a64e0250e4767af9ab9182 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_237/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c215516db50c8b86a31c54b6b3e5618757babb27a949fbbb6d7cd50484e35d +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_240/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_240/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abad524774f11c18e291b2aa89e944129445ce8b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_240/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:389eb9bdebd77dafb6b8994cb98502cbc96abb4e68808d8d96160817c6b0cadc +size 886917 diff --git a/eval-results/logiqa2/0/ckpt_240/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_240/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c936faf966a06aaca75e67eb7c43f2554ad038c2 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_240/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ec130189d8ac6a304a4d65bd2caab6a8f645113ffd245a20093304235fd07c +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_243/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_243/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f24e9a74701718afc862bc6bf9bf847f4b659b8 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_243/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b534652fa1129b84bd7ea5b2ad1a90877bb03e5678358afb37c6cab7aa28083 +size 886828 diff --git a/eval-results/logiqa2/0/ckpt_243/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_243/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..faeb0f379a6b4568aaca40078a98790acd4bb0f3 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_243/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f0f585bd25d8053ed897443be1860edbb3933f10748c74e623cfd9dd70fb456 +size 2898 diff --git a/eval-results/logiqa2/0/ckpt_246/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_246/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f16604f2eb4efd72b5250214f1d7d227ab68ce3 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_246/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d13abaf31bacb79b578170986c9d28bc8ca88b1c83831ff86ca77fa65441da +size 886720 diff --git a/eval-results/logiqa2/0/ckpt_246/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_246/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c940464c401661defd350540595934304d54499c --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_246/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a273964a97ab2087ce94c07c9f57d602407c0f99f6e03844226bd3d60fe60cd6 +size 2869 diff --git a/eval-results/logiqa2/0/ckpt_249/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_249/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3ebbb6588b00414a565954d1e5b01bdae569065 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_249/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9383c9d942df5a45d6406e1008e13991eff2e8c3abdefeb4acdb5e311e38d5d +size 886746 diff --git a/eval-results/logiqa2/0/ckpt_249/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_249/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe53884bdb6a580f15209c2e8eef7841a855bdb2 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_249/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db9bd00fdfedc1f623c578638a8e14c0f5e3d5cb8436ddd6aa399c1afe497af7 +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_252/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_252/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42d92dc32e1d6f9728c4f3a805a7904e00198c1d --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_252/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d33e31f196dc4783163e6b4180bc74876ece6417f11c7ae141aebb0ddc67025 +size 886753 diff --git a/eval-results/logiqa2/0/ckpt_252/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_252/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..289b2a1605277b5a0dfa7319b2d9dcecf1e46d76 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_252/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ded26d47a94c894144301463e2e65187c02c2aa085a3fbc2a0a1dc8f2fcac1 +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_255/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_255/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b714c85a9cf3c55a63fc9848280c5008ebbc4c30 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_255/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a35fc806fc296050e6cbfd955df19b269491e68c32ae496e24dd5ccb14026410 +size 886793 diff --git a/eval-results/logiqa2/0/ckpt_255/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_255/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c333d5c8f5f77bfb9f1f8c4fdef4118aa299bc1d --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_255/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81541e9ae31a0d8121eeb77b000dbce6a09c7aae68a2b0b2ede26dfd38665f90 +size 2866 diff --git a/eval-results/logiqa2/0/ckpt_258/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_258/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4eec3fde8b5fdc6920d4672a8e8179ac6801bf30 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_258/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d1cfd94dc487c2a7ca4970588e0f9810729dca80674ee0bb00938868607feb +size 886917 diff --git a/eval-results/logiqa2/0/ckpt_258/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_258/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c508f59e14b39cb6a4d6a872fe1cd0a81fb9df4e --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_258/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f0060a763724ce44db5a2d7c88df4ee4fef9010f89f78b5ff0215ba6ecc5cb2 +size 2870 diff --git a/eval-results/logiqa2/0/ckpt_261/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_261/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..762a0ae457d2cfb43ca9af40276f50d886805594 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_261/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f947c22c6c098ae24a7a8c71ab4bf1175f930063a339c9c3003b00ba5f1bc6d +size 886793 diff --git a/eval-results/logiqa2/0/ckpt_261/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_261/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..505f86a2b0d8dd076fc282e6bca316d73ddb5869 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_261/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e8b8dc5f48e7b092148f5aa75c8c209fcb47ce8ca950d6f9a979ff2e579ac76 +size 2863 diff --git a/eval-results/logiqa2/0/ckpt_264/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_264/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c194189d823a1102d70941f94f7c639d6522fbc7 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_264/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2ef56b3cb5e20ea16b1067be153bf075cea8db5dcd0a60d8660bf4c1a1c24f3 +size 886932 diff --git a/eval-results/logiqa2/0/ckpt_264/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_264/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd5be1c9d8db2b34371f46d9a646a691ef6c5b4a --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_264/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c544369796811287802f7f886f7efceb4abcdea86aef45ee9165c8170527c6 +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_267/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_267/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ae48d65eaee768663eea13efc1561541968d219 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_267/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5591fb013b7b2a65513e02c3a05e28b9aa5457a13c211e834c6727877341ba39 +size 886939 diff --git a/eval-results/logiqa2/0/ckpt_267/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_267/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b86ed079b624f574b7a14ae6c08014d395ba803 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_267/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca47b7bbe41a04e116165619470a0550b580813799351b59555f34fadaa9e57a +size 2858 diff --git a/eval-results/logiqa2/0/ckpt_270/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_270/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82489eb95ec6cacb697ce29acf3fb3114a53e0c9 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_270/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe519ffa3173830a74923912c2306f80cb0fef21d247386fbc2232c6bca0114e +size 886986 diff --git a/eval-results/logiqa2/0/ckpt_270/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_270/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d16865df4b98f65f50eadbc8543adb64089e1bb --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_270/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:375ba58dde7ba3677791bfc193dd24d28994630e864a32bdeb768382a68aa270 +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_273/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_273/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7710cce9e979f743584d19e8ead15297260349d1 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_273/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f8e0d7ac443d225f98ea2d22e85d094bdcb70538c5c4a9754c992f76adfcd54 +size 886959 diff --git a/eval-results/logiqa2/0/ckpt_273/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_273/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c4044b3f7d2a1a6c9e276784db2c562d03d9bdc --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_273/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9759a24fe17d811fa59d2d35c1868fd9a247e47f2b37b6f7c3a71423df55ed0a +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_276/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_276/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1636144e0d14da878f08fd224a0301cf2b4be347 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_276/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af7bb0c98195fb754306568b439d6118e113c2f86d3b8db17e89814c0c5ab519 +size 887074 diff --git a/eval-results/logiqa2/0/ckpt_276/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_276/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..062b938ee8090a1f5e556e2ea88d6a47c0c41f66 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_276/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e6f8cf7d19f8882b1a1e4b7c88587d0f872af3b78c1363a12f59ccd992f9a10 +size 2898 diff --git a/eval-results/logiqa2/0/ckpt_279/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_279/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3db1344e2dba6413e523b8bac6847cac2270b303 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_279/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d36f4931115a42e73fbb1732f7541b7c3fdefc33d2856e5bbaa174b431923c6d +size 886681 diff --git a/eval-results/logiqa2/0/ckpt_279/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_279/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27ada93203133b01759cefc47e84f02f32028daf --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_279/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7721c513857037ffe1a802d58d58313a2d09f8d9c3496ba55a80a07125177cfa +size 2869 diff --git a/eval-results/logiqa2/0/ckpt_282/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_282/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b844cb1e56b925c0c21ec47d6dc24aee8ddd2760 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_282/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606f201c0a6b22860fceac363b8087743c359b45b394bd651a3a819a7d0251fe +size 886708 diff --git a/eval-results/logiqa2/0/ckpt_282/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_282/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7aef28a654acfab4df54f778fcae61c269c40b3 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_282/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1741f827a92c917e5ecdffd91cf184a77e3c01b03ce133cdf205558480847acc +size 2846 diff --git a/eval-results/logiqa2/0/ckpt_285/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_285/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4625cff07b027a652f2913c8684e9cf9b3c455bf --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_285/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9e37ccc0e00e5b5f2c1925ce58148c6c27085a393fdb51fd9a2271b1cc7488e +size 886757 diff --git a/eval-results/logiqa2/0/ckpt_285/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_285/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c439f11900468afc527c6fea8e0c603479ec3a6 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_285/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bc3a27f5eb59fdcce0ddbd69647c37fc3141aa38f1208553f2e552e00aaa6b4 +size 2869 diff --git a/eval-results/logiqa2/0/ckpt_288/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_288/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b4086e681d84dfc63035d2623ff8613fe65a810 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_288/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c58d1e20209095a5f7e8da0a07a3f5e125896c5be8dd15fa8056cae55ba3641e +size 886656 diff --git a/eval-results/logiqa2/0/ckpt_288/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_288/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9db40fadca475c0df941c146d517d5e3bdc974cf --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_288/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f52dc5ded639baee98b9c32d1b565a7c3fc70bf727416c9f0ecfc0f31cfde2 +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_291/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_291/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5caa56475a6417c274d577887374daabb4d0a2f0 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_291/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc4abc0cfe9266a6cebc520271bf315931839534ffcfbd4e7048b3561560515f +size 886990 diff --git a/eval-results/logiqa2/0/ckpt_291/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_291/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e61ba128b00de0f5d788ae961095441302431787 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_291/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c04c02bc20c4a524f94234ee10eae01bf228607dccd9587a7b55066537da879 +size 2869 diff --git a/eval-results/logiqa2/0/ckpt_294/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_294/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5a2f6333a5074df9f19bf1f679a422cf693df85 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_294/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37ecd78f6c0e8303c8a97689b378d4850e7f96914e207996a65e7712caf7ae31 +size 886768 diff --git a/eval-results/logiqa2/0/ckpt_294/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_294/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef68cfed8300f59cfec3e3aa4d12d71abf723c6c --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_294/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17411c0559036cf527b023a7389faf0be13742737c1e8323ab22d3411882d748 +size 2894 diff --git a/eval-results/logiqa2/0/ckpt_297/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_297/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6a1ebda67a0a2ff7ed3289418c3ed3fe22f2c1e --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_297/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37fa8e4179e70c41880d7316f26497db8bfce3e2d70c53f9fa613c55df063d61 +size 886775 diff --git a/eval-results/logiqa2/0/ckpt_297/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_297/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70901bb5004478cd7f8024ccde22337a7f743dbe --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_297/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a26a95be4f564b612321e7bb6a61a770c73f354acd7b7e579f46e56c03c7304 +size 2864 diff --git a/eval-results/logiqa2/0/ckpt_300/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_300/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76a024c7862eedb8f4bc41b3f7a5982121e57bbc --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_300/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17f3198f64b2ada8a98c8c0a43f136d636cd21d3da66c10f2855b598aeed9911 +size 886920 diff --git a/eval-results/logiqa2/0/ckpt_300/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_300/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbdf8b132fe5470f384ae7d0264841299b7d6d41 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_300/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e701b6c5dd5861a2db4cdc58016c8766bed2a5a886a8b7fc6b93661447ca48d6 +size 2864 diff --git a/eval-results/logiqa2/0/ckpt_303/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_303/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b4d3e77ae676bd0b697ea3b7a47d333154dea5c --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_303/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a4210c1fcc87981be128b7908fbffc07840a4aeee8daaf1bfbdc625dc50e960 +size 886681 diff --git a/eval-results/logiqa2/0/ckpt_303/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_303/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99b68d336fab21301f6dcf92954d76e5f6b8fab7 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_303/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b73ff253ac3c3e05c1110611c1ed72093dacff03863adffae3fd1510983d6ac +size 2856 diff --git a/eval-results/logiqa2/0/ckpt_306/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_306/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3c3a6a8412df2faac33dac1d0730c2d28f1c51c --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_306/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68497a237a87f321ab10784f7c08d3d0836343494d2a57534a38bd567256aec +size 886799 diff --git a/eval-results/logiqa2/0/ckpt_306/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_306/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb95aaaf106539c3c01c1608310ee6ce3c69e8e8 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_306/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b4bdb6e6a618b16ab50a97817538916b74433910daf267e49380091ec14c52a +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_309/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_309/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6c8badeff928a60560feec534c36ba65bcb381a --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_309/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd8695926730c436a5117c927fffe1a4245b08d1c55b32bbfff3d0f92eb2417a +size 886765 diff --git a/eval-results/logiqa2/0/ckpt_309/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_309/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c30bf44c3410967c47ca79c812bfeb66f84065ee --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_309/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3787921f39a321b9af1d4f3ddddb4f9af4fafebee621521d661f2cdd054abcd8 +size 2874 diff --git a/eval-results/logiqa2/0/ckpt_312/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_312/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae8372a10039026b9e66b1d041a4ade3815233a6 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_312/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d71d0e08810627dcc8e9251ea0e9a71a27d068809b3079a0af559b08378b77f +size 886817 diff --git a/eval-results/logiqa2/0/ckpt_312/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_312/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d43fe3d805c3ddee0f0579635864ea2a78c40b0 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_312/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d386e0d81efff64fe8c8645c3854cc860f426e969eb7a4b0dc26e248ef6cf0f +size 2865 diff --git a/eval-results/logiqa2/0/ckpt_315/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_315/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c964b7396877ffcdf04b8f60c13a4115dd5dd578 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_315/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd395a6640eb670e7d7bf3465372d76ace8167494005d99c4757a23f43ece506 +size 886911 diff --git a/eval-results/logiqa2/0/ckpt_315/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_315/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1506297b518d2a5fe314c9eacae5f6126958b1a3 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_315/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef248d6b21e7bb5951fb8b68021671d4b65c4ec3f45ea0fddad4611710dbf25c +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_318/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_318/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26e5a72609b57bc9c82f9579465ebc03557aa306 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_318/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5505ee1ca23926323040ca83ab776a5d793824987fb1dadafff985663f4c9811 +size 886757 diff --git a/eval-results/logiqa2/0/ckpt_318/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_318/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6040d54ebbd3fc28ff63a3af2f805602e6b4a7d0 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_318/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d017e88f3e82aa9f1e778277d619633d2dfd883cd049688c03f9b292d54abcce +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_321/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_321/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0aa8ba8eefec71f12c432af8525d1cc3183ff5e0 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_321/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbfbfb63953224cb27dc98b1b9fc3e6812bd11de82a48dbbb4602f30cf6da830 +size 886949 diff --git a/eval-results/logiqa2/0/ckpt_321/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_321/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d92272a701b9bb39fc8f164d37640e3c4b0aaf90 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_321/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7d9947333088832fd4e81c4995f1eb04c4a29189e1dc7337f52eda41ff961df +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_324/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_324/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5384d8aed5baa2920096a66aa870f1003d83dfd --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_324/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:381cd07935ad929abeb5149b25dbaffec87ea11efe7038aab90096b5e24bcbf7 +size 887095 diff --git a/eval-results/logiqa2/0/ckpt_324/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_324/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ff43c10f35f2b68883c2670910b4d0dac25d999 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_324/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f14350cb6e4703cbe5173734562795eb37ec14249fde6436ada74fc6ee25ad0d +size 2870 diff --git a/eval-results/logiqa2/0/ckpt_327/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_327/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3876f85f8fbe12aed36ffd44197ea59b8f2efa1 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_327/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e33a0cf48595d0041b16a879595e8a687d64c9717019a3979498602260fc3ac7 +size 886881 diff --git a/eval-results/logiqa2/0/ckpt_327/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_327/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..750fb3f3b298e38ce4447f5e461d240eeab5b0c4 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_327/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa3d062616821b8d861381a3392cb3530e13a3da630e89d110e31b61a935e3c4 +size 2869 diff --git a/eval-results/logiqa2/0/ckpt_330/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_330/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a4a0156e584eb9c36bb171ad2cc65b5d4835e8f --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_330/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e413961aa8ea06f4b308e79c8eff2adeee90080eafaa1238c3a0040eb5f61a0c +size 886887 diff --git a/eval-results/logiqa2/0/ckpt_330/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_330/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a2b316c38df3b5edafa11c96b03c9c7826fef68 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_330/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896811516fad212a64a3f9064c78c9db9a2f67d8223b2e77e5ba9433883c6997 +size 2870 diff --git a/eval-results/logiqa2/0/ckpt_333/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_333/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c66451417ce2439dfa94cd7a172a81411948c392 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_333/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab8a44d3daeaeb4a2e3e25d6a0c7295e6329b75474a35d817c036bafde55cce1 +size 886967 diff --git a/eval-results/logiqa2/0/ckpt_333/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_333/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ef3b051e4e8e4cf350675eaab558d10ea3e5b17 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_333/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a21f3f34604b37e5291bb085baf2b8762a2c60afa475e5ee0cc23f70e4e51d74 +size 2867 diff --git a/eval-results/logiqa2/0/ckpt_336/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_336/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2c50e36632b6e447cfd4a69f60876a89b2c0e9c --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_336/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466abd7115205b81b046d580f221015fc32556b3b0d859474d494392bfbd2e6d +size 886979 diff --git a/eval-results/logiqa2/0/ckpt_336/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_336/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33f7ca740807c2a0e646a7676ff9a1fafa71ae74 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_336/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d6c2d104c572044607de5ba46bed6260c9227eb5db4953301ba368ff3b3dc32 +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_339/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_339/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f676c738695decf767d81dafde4abdfbf90ec2f6 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_339/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d22d88c9d5bb95e574c3cf74a3bd417f01cffa5460666e5c8759560b33cf99a +size 886948 diff --git a/eval-results/logiqa2/0/ckpt_339/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_339/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f70a833e923366baaa955766a1b56efdb745769 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_339/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c756353ae6b760f56eac98d1c942a2228a9e403f9af2026a4d1fdbe3e1a32802 +size 2846 diff --git a/eval-results/logiqa2/0/ckpt_342/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_342/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26b92906500374d42df934d19625061aacc4e296 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_342/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f728bb04e7982060b5a0de7c451cd11e1fad17c84d9a9229d2b3a9ca27b15720 +size 886768 diff --git a/eval-results/logiqa2/0/ckpt_342/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_342/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..915014f9d279078a91c4213cfa4527f5d3b06deb --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_342/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d07ac91d6d88c9978824f6a2751bbd5f33e01a4ef5209fa659cc74bf33fa25e1 +size 2872 diff --git a/eval-results/logiqa2/0/ckpt_345/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_345/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4abfcce0c8756f5e22e82fb921ab7824d14804c1 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_345/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b4d1ca1c82fcd37fd0ac59cabc078d1e91efc57c8314c3a18f9855a1b416ca +size 886635 diff --git a/eval-results/logiqa2/0/ckpt_345/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_345/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1d50752d61ee0022e301e86ec7ad43312c0104f --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_345/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e178a9aba1e5edeee2cbdb0a237770c9aff7209e787b92257f8ddc6a951811c +size 2863 diff --git a/eval-results/logiqa2/0/ckpt_348/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_348/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90aa4580823432a927f8c5246f03d17a2fabb1f6 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_348/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8184c8995be3860170821d40ac0980e585bb11f7eaa3763d99526ea544326656 +size 886928 diff --git a/eval-results/logiqa2/0/ckpt_348/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_348/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3a44bdc6d673f88c4b38c3249170c62fb855a7a --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_348/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f1104955e00d11e0ed584b315bdf81978bb63d3dd09d62fc3b55d6007aaf2b +size 2869 diff --git a/eval-results/logiqa2/0/ckpt_351/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_351/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78e2fddbc45e3847e2ecd16072ff34cb65b4c9b6 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_351/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e327fa9b04b902ecd7ba2bdf2367457f0f11c9e953ac9f62f2cee2838695b1b +size 886904 diff --git a/eval-results/logiqa2/0/ckpt_351/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_351/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad920a3b2e563ab73e77f6f088dc574ad083518a --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_351/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b34e125727402dc2d3b1c823873fbfdc8b708578f2656f7b6b9a80b831eba7f +size 2868 diff --git a/eval-results/logiqa2/0/ckpt_354/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_354/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a077164be87f2842210fc18002637f7d1f00ea2 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_354/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4416d65d33b9f22cde67028247e1ea99ec27fc0f03e4b37fdaf0f99b23d12f1 +size 886697 diff --git a/eval-results/logiqa2/0/ckpt_354/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_354/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b23846f1d6417e46f09900bb928161a3fe8d44d1 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_354/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72da7cf4cde6f5b9740121a0aef05d4dd6b8442046c8f76098ecfbc1f53f6416 +size 2863 diff --git a/eval-results/logiqa2/0/ckpt_357/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_357/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93559163060259e743770277940f6705ed6f0f15 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_357/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b8929c059cf46de9dec7688ca998e0ea1e049077aaae8a8e692121e9bf727c3 +size 886808 diff --git a/eval-results/logiqa2/0/ckpt_357/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_357/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f201d6f2cd191402bc12200f848c6f8da937176 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_357/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b658c1f8369d64ff429f820220f0848e81fd5174973a78d5b380a0dc3262c34 +size 2873 diff --git a/eval-results/logiqa2/0/ckpt_360/logiqa2.jsonl.tar.gz b/eval-results/logiqa2/0/ckpt_360/logiqa2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0d256c9c3ed5b7e8519dff98bbc802fc94d848b --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_360/logiqa2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2502873abbef5a40b367dfbe4ad5f646182a26e95b03b02886677bc0464dc733 +size 886807 diff --git a/eval-results/logiqa2/0/ckpt_360/results.json.tar.gz b/eval-results/logiqa2/0/ckpt_360/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91cd7b9e27642cda1a4351dc32c5823345f9f778 --- /dev/null +++ b/eval-results/logiqa2/0/ckpt_360/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b08f7a9d57cff34024df80e053b4e79b6db97f804090224a091a850fc14b3dea +size 2865 diff --git a/eval-results/mathqa/5/ckpt_003/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_003/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8557b3740bea44ea3f399787c9c42af5bb34858d --- /dev/null +++ b/eval-results/mathqa/5/ckpt_003/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4f52a706beb8750dc5d3cb3b3616c0232caafd918450dd8728fbc3369976ab1 +size 2294808 diff --git a/eval-results/mathqa/5/ckpt_003/results.json.tar.gz b/eval-results/mathqa/5/ckpt_003/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72ddc28627d48d48b8bcd4ea75eb2be89ca95667 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_003/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ec93116bb12859d11f099cd96e1002207754a351d991c4046fa5a1f04799f1c +size 2813 diff --git a/eval-results/mathqa/5/ckpt_006/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_006/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93bf022340539a096d020a5302290d63a3296ed6 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_006/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c04887e871f673cd786c5241a57b7f639bcbe60004f4dde83ce4ea546e819e36 +size 2294937 diff --git a/eval-results/mathqa/5/ckpt_006/results.json.tar.gz b/eval-results/mathqa/5/ckpt_006/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..515a9e29c1a91bd8e0ea27fb3a7f5b3ecab6ff12 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_006/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9d9d14da56d77276b2bfe22ab3a4183de84f0cdd05e562bf71f9999cb3985e4 +size 2811 diff --git a/eval-results/mathqa/5/ckpt_009/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_009/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24c849f3d1ad92bb54b7fa3697aaad954dbd01b5 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_009/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e47733619a5abc247d6cceb262d4b7dbddb353d5fde4f816be97d53e89569c5e +size 2295238 diff --git a/eval-results/mathqa/5/ckpt_009/results.json.tar.gz b/eval-results/mathqa/5/ckpt_009/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f56de72ef8f12a069262cb6446edbfc9b707ef7a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_009/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a20aa2374717b9ae4dd70d2bccc4e14e4614ab4f9d89560c93287b5c3cb5fdbe +size 2816 diff --git a/eval-results/mathqa/5/ckpt_012/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_012/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a407f4470582c089ef2495175961998fc3ed0509 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_012/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e73f795e7e640ea4046be93aa2a30819fb663559cdaee6e443910ec4d8f5205 +size 2294893 diff --git a/eval-results/mathqa/5/ckpt_012/results.json.tar.gz b/eval-results/mathqa/5/ckpt_012/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b69f875effbf11f21eea7ca2f704d3caceea23de --- /dev/null +++ b/eval-results/mathqa/5/ckpt_012/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4140867aa55fdb7449e39acc25e6685cd08bb25e63b55606c648a777cd696a3 +size 2811 diff --git a/eval-results/mathqa/5/ckpt_015/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_015/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ddf85fe8a8590c52d296f61623c80290275f8fa --- /dev/null +++ b/eval-results/mathqa/5/ckpt_015/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b069e68d380e0d7d2adbc53acf91d1c92b871fdf093b32e1ac2bc66eb959c69 +size 2294735 diff --git a/eval-results/mathqa/5/ckpt_015/results.json.tar.gz b/eval-results/mathqa/5/ckpt_015/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e0335fa41489caa6ce2c698e544389afd0185ec --- /dev/null +++ b/eval-results/mathqa/5/ckpt_015/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8349ffa2753e963c68bcc46353c2720701d04bdbbe46b9e3cbcbdd92d0c1171 +size 2846 diff --git a/eval-results/mathqa/5/ckpt_018/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_018/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59d36747fbd1fde39e60b02799518ad4be885bdd --- /dev/null +++ b/eval-results/mathqa/5/ckpt_018/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94bca8f3105feb576b03a43a5ff531a23bb2d7210f0bf3fc51a5c54b352bb714 +size 2295010 diff --git a/eval-results/mathqa/5/ckpt_018/results.json.tar.gz b/eval-results/mathqa/5/ckpt_018/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8da7799eeb7a5636f6ed8edfeace2f840d3bdadd --- /dev/null +++ b/eval-results/mathqa/5/ckpt_018/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ee431ab4d7b902b4a376811f8c0cbb7bb34efa6b219a276a14271d85b55a80 +size 2816 diff --git a/eval-results/mathqa/5/ckpt_021/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_021/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3919cbefb85f1fc27b2742d22d129958b03be05a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_021/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:203ddbded187b12e4d0eed826df60da0371346be11ffc088a1516369dabf112f +size 2294688 diff --git a/eval-results/mathqa/5/ckpt_021/results.json.tar.gz b/eval-results/mathqa/5/ckpt_021/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae24bb549fbb007c21f4086defe0e133a0055d67 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_021/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd0072f87af4a36701f167541fb77a64d4b14f5a310452b7087c4648e02b745 +size 2812 diff --git a/eval-results/mathqa/5/ckpt_024/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_024/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03fa4f1b6ec38579e7d6216b50db3fa7649d29d0 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_024/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dfccbcec7bc5cf558bde953020fa46cbd36b81d42396a4ab26af74902978975 +size 2295363 diff --git a/eval-results/mathqa/5/ckpt_024/results.json.tar.gz b/eval-results/mathqa/5/ckpt_024/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd2806770df829ce8fc503ed78dad0f23046260b --- /dev/null +++ b/eval-results/mathqa/5/ckpt_024/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e68869e31b1715161b0b34a9877b6a7fe442f64caf0d59277b106dd3786750c +size 2814 diff --git a/eval-results/mathqa/5/ckpt_027/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_027/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f7952fd7bad09f1e1f093ddfe0df75e36fb315f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_027/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4866e888ed80efe4270d30e54650064a50a718b332ce953505711ae21471e4a +size 2295347 diff --git a/eval-results/mathqa/5/ckpt_027/results.json.tar.gz b/eval-results/mathqa/5/ckpt_027/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32e75b4d10fce8becfc71716358b53253e833178 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_027/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcd30ec9b76517553dd289239bf04d612a566ec825d4acdcede6dba36fd38bb1 +size 2793 diff --git a/eval-results/mathqa/5/ckpt_030/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_030/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa71cb30e079589a15c82e34c0b30c0d79bb7c41 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_030/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f04bb13003d738eadc319b253007ffc302e4027a0aadc341841ba757d996eb +size 2295833 diff --git a/eval-results/mathqa/5/ckpt_030/results.json.tar.gz b/eval-results/mathqa/5/ckpt_030/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ba9a8844330f1066d4ba5b82f170bbb4780b920 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_030/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:776e006a35681375a5322583c8b2a4fdf8308135cdf02023140cc6f5ff16ad9b +size 2815 diff --git a/eval-results/mathqa/5/ckpt_033/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_033/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86aed0016f6cd3b39a15ed1c2872f56d2ff0f7ba --- /dev/null +++ b/eval-results/mathqa/5/ckpt_033/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68519d2496ca4b4aa677b8dcd2216ac41608e9d34b2c45a321f218da09b2e1f9 +size 2295570 diff --git a/eval-results/mathqa/5/ckpt_033/results.json.tar.gz b/eval-results/mathqa/5/ckpt_033/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff8d4d7620fb0615c677f4dbf6ab87592800fc2f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_033/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37dc6c1637f1c5cde5a1e7e983ff06e62ed720a79e13e1e501f6824a44f8504b +size 2816 diff --git a/eval-results/mathqa/5/ckpt_036/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_036/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..877a82377be720d14102a6c7650288b4b80abf3d --- /dev/null +++ b/eval-results/mathqa/5/ckpt_036/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aff089449562b0cebc1d2ff528e7593ac3ab08bdec5ce389e2fba54d136956e +size 2295456 diff --git a/eval-results/mathqa/5/ckpt_036/results.json.tar.gz b/eval-results/mathqa/5/ckpt_036/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9343621314f25744cdc4fcfa000287dc1eb8bb85 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_036/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d3cd88ea9e16347f3be5f2a133b55bf50a91454affd29caa47d4c397f012187 +size 2810 diff --git a/eval-results/mathqa/5/ckpt_039/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_039/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e97a9113207028e621c04f10c1189213523dbac8 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_039/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cc61afe233b68c0c9b6f8864dc3d1c66dc2e01f0f8cf9a79e11837b38c782d +size 2295891 diff --git a/eval-results/mathqa/5/ckpt_039/results.json.tar.gz b/eval-results/mathqa/5/ckpt_039/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c55048d0633f0c038e835c00ae8cdc8dfe2d5ddf --- /dev/null +++ b/eval-results/mathqa/5/ckpt_039/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9071768ab5885e81f9b8eba5cbfe90b5520e0b78f1421caaeee6ee195380e248 +size 2812 diff --git a/eval-results/mathqa/5/ckpt_042/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_042/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b3faca63ab027886471a7dc22809b86a2b42fe5 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_042/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57bf62889c6dcf08faecad744449272bbbf99b8b6ab8d0da5d9e41bec024c97e +size 2295771 diff --git a/eval-results/mathqa/5/ckpt_042/results.json.tar.gz b/eval-results/mathqa/5/ckpt_042/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5759c5e690b4e2c932d9d82ab5e40afcb7adddf --- /dev/null +++ b/eval-results/mathqa/5/ckpt_042/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e1e17e8d19a4a1c0769d53e4e88d4c142fe8adfd47e23d5eaf7faa9097199b2 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_045/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_045/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5e9985bedd7765c3b39d1b59aad5d1c1005fcca --- /dev/null +++ b/eval-results/mathqa/5/ckpt_045/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d502eb683a1b0754016b85883f4c0cc80a58f851f3557f55198b6ac4433061 +size 2295942 diff --git a/eval-results/mathqa/5/ckpt_045/results.json.tar.gz b/eval-results/mathqa/5/ckpt_045/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45aa2a3ea62b3755a1fac291982a8fc6b5714591 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_045/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95c872d593997a8b07987e0ae96adf96c2b69d5c91555af8781e519023dac7cf +size 2813 diff --git a/eval-results/mathqa/5/ckpt_048/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_048/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08c561334eaf2ac12d95b264aa59365656af9edb --- /dev/null +++ b/eval-results/mathqa/5/ckpt_048/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53207e424c0f10248c7d9db371be292e9de4433bcef0580f9bb13d4f1af597ee +size 2295941 diff --git a/eval-results/mathqa/5/ckpt_048/results.json.tar.gz b/eval-results/mathqa/5/ckpt_048/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c23dbee287bcc2794b24095c6dba2ad4a820f686 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_048/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a23bd34057316c37790f1954d2028a606cea8960d380aacd41f2a7e77ef4cb9 +size 2811 diff --git a/eval-results/mathqa/5/ckpt_051/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_051/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88c2ced7d59492b64d2167258e2dca6b02ed3d40 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_051/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:676464337a1d161be6fcce59f9a9d38ee8c06b542aba9b99a54c46a14e9b0b45 +size 2295745 diff --git a/eval-results/mathqa/5/ckpt_051/results.json.tar.gz b/eval-results/mathqa/5/ckpt_051/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1faed25eb2683f422826bfea231df31f52018725 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_051/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89b5002c46369d3b9db85502ad09e25cfae64a92c3ad4f9b09c263dbb2adf562 +size 2817 diff --git a/eval-results/mathqa/5/ckpt_054/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_054/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42d01ab15d6d4ec1e8ab2c80824a7f27c4c35bbd --- /dev/null +++ b/eval-results/mathqa/5/ckpt_054/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28cc035dde381557198083c1acf9c798dd647db6a17aca82f70cb209e9853a98 +size 2296023 diff --git a/eval-results/mathqa/5/ckpt_054/results.json.tar.gz b/eval-results/mathqa/5/ckpt_054/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..557cd065da5f0185ade79dbe416598d92e68a3cc --- /dev/null +++ b/eval-results/mathqa/5/ckpt_054/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77e1a88a5938a4b2f82e840edb2d475d473fbaf0634d6a16fc231a71aff2081d +size 2811 diff --git a/eval-results/mathqa/5/ckpt_057/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_057/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97520d247dd04206912a2b5e4903fbee09299aee --- /dev/null +++ b/eval-results/mathqa/5/ckpt_057/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34cbce1437c0dce76a860425900632f6d31d5209061c2c5156c8bad99a3c4c8a +size 2296181 diff --git a/eval-results/mathqa/5/ckpt_057/results.json.tar.gz b/eval-results/mathqa/5/ckpt_057/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..570e2f7a4f51b2a74e26c543303910ca1e95ab31 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_057/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85cee470c8737e0cf628f176b4523721f012c4eb721520cccb7ea637529f1565 +size 2814 diff --git a/eval-results/mathqa/5/ckpt_060/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_060/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..573ed275002706df2aef47f6c1d03cee4babb0eb --- /dev/null +++ b/eval-results/mathqa/5/ckpt_060/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c5416c14d847fa1d0260dddfd43bfa523b37d277e151a295c3dc48ca3f94328 +size 2296375 diff --git a/eval-results/mathqa/5/ckpt_060/results.json.tar.gz b/eval-results/mathqa/5/ckpt_060/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0b776256a25f63ec2f2dcd5ae077010bc5af8f5 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_060/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:775a1e8f71ed69c150c92d5355188189cd3ca8e6e27615a6e89e910cc3fe6ccf +size 2817 diff --git a/eval-results/mathqa/5/ckpt_063/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_063/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9f6285c42d6221f3bf51413b2a65a65665eff94 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_063/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff50449859c713838d9432a2e63cd26f3fba2e6a56eff4669b39644c4d2372d2 +size 2295891 diff --git a/eval-results/mathqa/5/ckpt_063/results.json.tar.gz b/eval-results/mathqa/5/ckpt_063/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01657ced4aa427d1679809e97df2f6984cf2ed97 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_063/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bdb25ec8312808e3fb9f64c5db1d5b5983611363662bafe587453b84592db5d +size 2816 diff --git a/eval-results/mathqa/5/ckpt_066/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_066/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9ed1f2594135e2677586d7866ce09d5dda3caa7 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_066/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:249afcd5efe9839de25a2236ac935fed5600552ce5df41579246ae46ce58ae7e +size 2295810 diff --git a/eval-results/mathqa/5/ckpt_066/results.json.tar.gz b/eval-results/mathqa/5/ckpt_066/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c94998077b8f39ef950a51fa57e3ad3bbe45372 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_066/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8515ca62697a56a42ab1577900e95eb9e969ac7e6d303dd8829311955be98de0 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_069/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_069/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66b566356a614fa60cf4c943d89e9e753c5cf53e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_069/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57c2350ac208bd0026fc11df813d46e7397ac3ce27073e544174da73d0d47366 +size 2295782 diff --git a/eval-results/mathqa/5/ckpt_069/results.json.tar.gz b/eval-results/mathqa/5/ckpt_069/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9ab1b66ec55a7c03a5c44e821598d09ceb9ad0b --- /dev/null +++ b/eval-results/mathqa/5/ckpt_069/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:951adebb94583b39f4ee8dde71d9746cd5761e75348ab37df936cd0b8868acaf +size 2812 diff --git a/eval-results/mathqa/5/ckpt_072/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_072/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b25194a6fe96c18b5e119004d54e9973cdb60a23 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_072/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4009bae5abda5883deb03950e2393d1a7b90cc276ebd55e7c402d4af30f9573 +size 2296291 diff --git a/eval-results/mathqa/5/ckpt_072/results.json.tar.gz b/eval-results/mathqa/5/ckpt_072/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a48468b518f3555e5a3513f6f934cbf978299561 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_072/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a291a48c9c75d7c00812c77e877f669e020858b3b71f469a09f341125b8a5623 +size 2814 diff --git a/eval-results/mathqa/5/ckpt_075/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_075/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4c89f2332592e9a64d0f1545f6c122108fc6b5f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_075/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8b4920c27117c02e35ad1fa5ac8d1d436eabb9d3c3b22d8db067aaabff36494 +size 2295803 diff --git a/eval-results/mathqa/5/ckpt_075/results.json.tar.gz b/eval-results/mathqa/5/ckpt_075/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3247f7182fbf0764de1ae1b3020930efa0afa57 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_075/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d9d491179b45d07b8cbb077ab5062968d81005d243cfa4784a52d57d4e760d +size 2815 diff --git a/eval-results/mathqa/5/ckpt_078/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_078/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..235920968e020d5a541dde7f7510266d4d3513c7 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_078/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:871e41add80e88bcff6de863930ae117feb386c8b012b74b40df9492081f42fc +size 2295969 diff --git a/eval-results/mathqa/5/ckpt_078/results.json.tar.gz b/eval-results/mathqa/5/ckpt_078/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3e710d9a3a643a94e737233fb5e2f6fe3fca83c --- /dev/null +++ b/eval-results/mathqa/5/ckpt_078/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e2d3aad78a5ca5b9f61bdc2efc6b14b05c802921e55e21e3b44a099259bcac +size 2843 diff --git a/eval-results/mathqa/5/ckpt_081/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_081/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..614c7ed5863ce445b0efbfc5fcc428bb1465e1da --- /dev/null +++ b/eval-results/mathqa/5/ckpt_081/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0822eacf6529a2c6d5f397a1e213dd0b12ef953fb1fc3d517cbbf53792dd7d1f +size 2295946 diff --git a/eval-results/mathqa/5/ckpt_081/results.json.tar.gz b/eval-results/mathqa/5/ckpt_081/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a8e33bdeedc947d991857fe139d731b5f503b6e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_081/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a91666e090aefe99dc1130b50a02c1df62122b126ff3238aab04a77e1b3a0ad5 +size 2815 diff --git a/eval-results/mathqa/5/ckpt_084/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_084/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8913ada2a1b866c1b31b520f1b7e0fd9bab1fa7 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_084/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467d8ff881d2960d83a75e2e52b1d7289494f0224c895f72b0b730d15fdb7770 +size 2296245 diff --git a/eval-results/mathqa/5/ckpt_084/results.json.tar.gz b/eval-results/mathqa/5/ckpt_084/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2bc82776d3c6e1a9c8480415f2140725ff9062c8 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_084/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204b473b3ef223cc6f75716a8c13f6744df7e5b66740774f334de31aaf66d8b5 +size 2816 diff --git a/eval-results/mathqa/5/ckpt_087/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_087/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..167644acf86c1cc10070f878c9c86c1d9aa37e1b --- /dev/null +++ b/eval-results/mathqa/5/ckpt_087/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c25ee8ecc3ed4b07571e9dbbe6e7d6f308a68899efe1f9004cd1e1a2391f8fd +size 2296300 diff --git a/eval-results/mathqa/5/ckpt_087/results.json.tar.gz b/eval-results/mathqa/5/ckpt_087/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47496616072632f266cd0614ba24841ed413064a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_087/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b71a8b8eafabaa6fb8f1e1d305b88ea3d8933819c465ea72cafdde1ab9ad22d +size 2815 diff --git a/eval-results/mathqa/5/ckpt_090/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_090/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2ec5b22b4a7549e9d6cf3ff03e9bd8469c6a73f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_090/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88bdd1d6fb8c68e458239aa5065db9c4983bf8ba3dab1242739676e14ccf8728 +size 2296059 diff --git a/eval-results/mathqa/5/ckpt_090/results.json.tar.gz b/eval-results/mathqa/5/ckpt_090/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9a92e0313c59585987169f4c3cea400ce10e5eb --- /dev/null +++ b/eval-results/mathqa/5/ckpt_090/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9924c1bf571849e3cff4658aa60ffd0356b415f4db130280268df2d3597e489b +size 2815 diff --git a/eval-results/mathqa/5/ckpt_093/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_093/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44316471af91e4f2800d387bc23e8c3bae64a491 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_093/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ca54b14ce5030fbdc0e9be85d7c023e123e5710683c887e3bbdbb3389147ee +size 2296324 diff --git a/eval-results/mathqa/5/ckpt_093/results.json.tar.gz b/eval-results/mathqa/5/ckpt_093/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d40978c84f295a2d76b2ff92436b540ab8084c40 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_093/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4190d484d3d16beab9b6570a3e2b3fd45596f849859536acfb14d5beedbb98b9 +size 2815 diff --git a/eval-results/mathqa/5/ckpt_096/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_096/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d007be8ad5198716e0c58c86ca668ebf79b129f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_096/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11102eb67931d19687ffb48242a57a278288fbe565af0a02d684d6df99f6e7be +size 2296624 diff --git a/eval-results/mathqa/5/ckpt_096/results.json.tar.gz b/eval-results/mathqa/5/ckpt_096/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37d932e07c6287e4f1856c74cad2a1f00f49bbb7 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_096/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:784cbacb0a9730e8f9dc650ad2dd98096262ef1d5c3eb4b6e172ff2b0dc44562 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_099/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_099/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..548a0a2492659d3565926e1fc97e59b82b8e512e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_099/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92d4d4931f70a6d4e6bf6cb8cde79893edfa34c85530aefc63a01d828635ed41 +size 2296448 diff --git a/eval-results/mathqa/5/ckpt_099/results.json.tar.gz b/eval-results/mathqa/5/ckpt_099/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca89830020ff827ccf60bcb13fe888b1676fd8b5 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_099/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ef12ba3441cd0ad14fda020f6057e0d254478f7f0358f3b9c409023a7c8c6d0 +size 2816 diff --git a/eval-results/mathqa/5/ckpt_102/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_102/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4499529a90ef0e28544f6c5bbb0d3ac96414b079 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_102/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d833be5a12eb7033cb6f08d5c546a2b9487015d2f18437140f071319f647a18 +size 2296558 diff --git a/eval-results/mathqa/5/ckpt_102/results.json.tar.gz b/eval-results/mathqa/5/ckpt_102/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9981c26da49e578758a33dbe64a01e45941b154b --- /dev/null +++ b/eval-results/mathqa/5/ckpt_102/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4eae707ea86bd912e1f6cf8b5ba772a099cfa1780ad44212f7934ee6ce22c04 +size 2809 diff --git a/eval-results/mathqa/5/ckpt_105/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_105/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..000e7169dd73e74950ab83c611eb2ec13232f6c8 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_105/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5754829c53cc37b31a7a3af0ffeeaa209bab602d86722c66851528947b277d2 +size 2296307 diff --git a/eval-results/mathqa/5/ckpt_105/results.json.tar.gz b/eval-results/mathqa/5/ckpt_105/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59b440b322398bf8b4e842ddd416afe57f71ff99 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_105/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c23771b4da1a833dcc255a8c09b8707bdf8f6135e78804ab7bf84416c0f507 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_108/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_108/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..205bc1ce308f22a0a315b11e5b2b3bc45de712d5 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_108/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7496f6dc84c10ed892342fabb26465486c4ac4441681549ab3baef4e9c0a1422 +size 2296167 diff --git a/eval-results/mathqa/5/ckpt_108/results.json.tar.gz b/eval-results/mathqa/5/ckpt_108/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28bca91e016978c08fa6aaeef892a3a91ccb93c0 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_108/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e27c09715303952c2313c3b2b73253dae39307c5db1b70ecd9999f15dc2f816 +size 2819 diff --git a/eval-results/mathqa/5/ckpt_111/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_111/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f6c7ff93fb81ce3477bef475cebc602f9e14f7a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_111/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2f50f82b098249370cec8aaddb90501afd15c57b430d2543d3bf4d9a45742fc +size 2296188 diff --git a/eval-results/mathqa/5/ckpt_111/results.json.tar.gz b/eval-results/mathqa/5/ckpt_111/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3bf684aeb1de115b50af825e722b21e360780d9 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_111/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2333ffb90d89aa8a0804fe8e4e8bf9e4708b08b946bd56ed54603d85ac69aba1 +size 2811 diff --git a/eval-results/mathqa/5/ckpt_114/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_114/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9023c2ecb380f727f61dedadbd1a15f40a1e6da3 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_114/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61b823ecce31a721750ea934b4bb7f07dd3b2f40c43b679b5c0d3cec8f5142b7 +size 2296423 diff --git a/eval-results/mathqa/5/ckpt_114/results.json.tar.gz b/eval-results/mathqa/5/ckpt_114/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c96a6cb3e480daa13f6395675ffea55e2d8ebeb --- /dev/null +++ b/eval-results/mathqa/5/ckpt_114/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9dc090e6128d7c7c9618deb2014022f11deef572fa38ec3370e031c96bd8fa1 +size 2812 diff --git a/eval-results/mathqa/5/ckpt_117/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_117/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14e94dd5e9022c4822ecf4ae6c232734116e5308 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_117/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4e0fa9ec94de0d53ea8e774c4a5a3636ffea5faf170a6e3df0871a94d762eaf +size 2296622 diff --git a/eval-results/mathqa/5/ckpt_117/results.json.tar.gz b/eval-results/mathqa/5/ckpt_117/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0340d995724cc64f84f6ec5e3dfb3cef2e644f41 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_117/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab286ef8f0c6447f15334f190cf4cc2b17d19feffce3387191dcfa0baa3eb6a0 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_120/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_120/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50df46525b4de2f736dc2ab236fbb89aac24a8af --- /dev/null +++ b/eval-results/mathqa/5/ckpt_120/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e62a241419c0280ee723e893bc54f7bc392348050c1dff999c49bf4f43e7f2f4 +size 2296560 diff --git a/eval-results/mathqa/5/ckpt_120/results.json.tar.gz b/eval-results/mathqa/5/ckpt_120/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2bf17280b5938cbf5b8709643f4b7bd86060a7f5 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_120/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8066390ba45b90397fb39d0895c2bb48b7a477378798258f11d3199420004cec +size 2815 diff --git a/eval-results/mathqa/5/ckpt_123/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_123/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70026f514fbb1078e2f90bb6d97e74f73ba382f8 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_123/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6503a020e11548ebb43013e1d4e0b1f894a5ab4af974a9fd438abaf92c704a87 +size 2296851 diff --git a/eval-results/mathqa/5/ckpt_123/results.json.tar.gz b/eval-results/mathqa/5/ckpt_123/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3163ca5e0a0236beeb9c8b04507fce1fc6824a6 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_123/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a0d6bd43d9cf7c2b47ef1e68fb74be72c7dcb0df71e0ce13d051f2dec474974 +size 2816 diff --git a/eval-results/mathqa/5/ckpt_126/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_126/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c240dd7a14d49cc9fd7a0107875150a7a7c8a770 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_126/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf50e7d03e12c158fcaf2d1313229371e0d446e8b90536ed79566e467e5e1392 +size 2296780 diff --git a/eval-results/mathqa/5/ckpt_126/results.json.tar.gz b/eval-results/mathqa/5/ckpt_126/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..560ce247a5692ea3beacecd3d570e8d62db4cf31 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_126/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8275d10f23eccb95c3b66e8ae4c2cb1b57aa2af0492705afb440a0ff2b92d737 +size 2810 diff --git a/eval-results/mathqa/5/ckpt_129/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_129/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13550804fbd817275d3ece52fa0c46afdc08b506 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_129/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7efed133a0ac3887b11dfbf475968ecf76b5f8a835cb6299bfdc6bf93373a684 +size 2296447 diff --git a/eval-results/mathqa/5/ckpt_129/results.json.tar.gz b/eval-results/mathqa/5/ckpt_129/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46822b9bd2eae3d633e7bec460fb3aae84451b2d --- /dev/null +++ b/eval-results/mathqa/5/ckpt_129/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24de92883e5b3a7a4a5e600686d3e5c9e1d12fd88b0c834716d20124abc0c409 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_132/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_132/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3a5f36a8206d706d39e2f9f29510aa4abf3ade6 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_132/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d552f2a5dd152dc4fff96ac967140b9c5e0eb3dce98d3d3557b9ad2a3c782525 +size 2296836 diff --git a/eval-results/mathqa/5/ckpt_132/results.json.tar.gz b/eval-results/mathqa/5/ckpt_132/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b059f7e1786926afc78cee0b776066356a268e92 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_132/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ba90d22798677cb1dc9dbb1660390f955d73c900dfcdf5f9bd5c702150a61cc +size 2814 diff --git a/eval-results/mathqa/5/ckpt_135/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_135/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..892ce9bbd427beb6fc34a8599b80a042c8d51d21 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_135/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36d99bc325b98baa98ecb7f94f349e9c560b7314ef0a065db264b91044d59b3e +size 2296385 diff --git a/eval-results/mathqa/5/ckpt_135/results.json.tar.gz b/eval-results/mathqa/5/ckpt_135/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..810f3af28158e19c787c26756612ece709504614 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_135/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b07cac0c833cb82f1075ae4df9a2755f62d5a3aa72eeb49d3b9ddeb819b16e98 +size 2816 diff --git a/eval-results/mathqa/5/ckpt_138/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_138/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ecf1faa02b30063dbe62cf719e5e70d8e4af226f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_138/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32a0d94e05dce97d324eca0d3b91b45c6b849208f2ab063e601a0df5c8e2e0d8 +size 2296387 diff --git a/eval-results/mathqa/5/ckpt_138/results.json.tar.gz b/eval-results/mathqa/5/ckpt_138/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b2c36ab9b6d47f44384a90cd496a57ce25eda9e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_138/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46f965bfbb1708e0420bc1a1a3338271ecfbf17addbd4738031d8ba847c14de6 +size 2806 diff --git a/eval-results/mathqa/5/ckpt_141/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_141/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08e18c5b0857421392c146ff9e1af38eac906de3 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_141/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf9a76f6c404d4425d088ac5f5ce133f316ebcd097dd8d334767e01cf52a330 +size 2296544 diff --git a/eval-results/mathqa/5/ckpt_141/results.json.tar.gz b/eval-results/mathqa/5/ckpt_141/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78550062bcbfd59b45c69f5eff94533673de95cc --- /dev/null +++ b/eval-results/mathqa/5/ckpt_141/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43d92166fd7b69e555121cbaf2fce4296af83a569e71975c58511efad4013197 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_144/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_144/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86ac36a1335c27af4fc5c62305421c4f1e593138 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_144/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0f494acce550207310c5b479fed9e615bdc9b1ea33a7bcbcbff2dede7db0f1f +size 2296844 diff --git a/eval-results/mathqa/5/ckpt_144/results.json.tar.gz b/eval-results/mathqa/5/ckpt_144/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdca38b6bdfcaf2c1559269bdaca439f154ad35a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_144/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfffb13f0082787710cb236594597fe374290a43fa561cd0d2c94ef123a44d04 +size 2810 diff --git a/eval-results/mathqa/5/ckpt_147/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_147/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7dcad250ebbcb49ab91fcd5e210644fdec68e4e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_147/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d966e1dd962adc83b02e5a9cc1e2d84f1b94e6e9301ba9e278362f4625261c7d +size 2296638 diff --git a/eval-results/mathqa/5/ckpt_147/results.json.tar.gz b/eval-results/mathqa/5/ckpt_147/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3b0e1f597849c082b2c36c9eb5010671f7cfeab --- /dev/null +++ b/eval-results/mathqa/5/ckpt_147/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:395599361b8e563b4e5dc206a0e243ffad36923cd6395d038e9718899b850545 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_150/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_150/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12a6a758498bb4f39ea53faa444ec278e5424abd --- /dev/null +++ b/eval-results/mathqa/5/ckpt_150/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e6c906e07a78d4e5deb877b04827844711661ef1ba8a376d5ffd08ca062d5a +size 2296826 diff --git a/eval-results/mathqa/5/ckpt_150/results.json.tar.gz b/eval-results/mathqa/5/ckpt_150/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99b44058366895d8e37abb31332aa5d814aa1061 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_150/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b50d3fcc042a52f7a1bb1c1c37979624c7fc193fae6d1a373f43c24cc029231 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_153/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_153/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..406392f15ad6ea85bced2586034b92353d971108 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_153/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691c8f6a58fc48093362f64a565fa2e5441fe6145a5c7c8eb55e24f0be2a04f +size 2296500 diff --git a/eval-results/mathqa/5/ckpt_153/results.json.tar.gz b/eval-results/mathqa/5/ckpt_153/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78f05e8386b5fae7ac204ae48684f9a083b4e84a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_153/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c878105b4241a55153fbcf3e2c57640accf047feb282633a04710c97c0372ea0 +size 2815 diff --git a/eval-results/mathqa/5/ckpt_156/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_156/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0d70ba7aed0daf1e0014a5b4756f784c9234b6d --- /dev/null +++ b/eval-results/mathqa/5/ckpt_156/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:659490509fa3ef61531e33bf54ff7d0cc0ed2344f7472a7e6af8975dbe65e3bc +size 2296330 diff --git a/eval-results/mathqa/5/ckpt_156/results.json.tar.gz b/eval-results/mathqa/5/ckpt_156/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ee60b727b83f04443b37afacd9b89ab8dba5b82 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_156/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc6656af29831e0cee79aedaad642de59c1f4ae5a488b62b36d32ea7ba1e9839 +size 2815 diff --git a/eval-results/mathqa/5/ckpt_159/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_159/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c977585c914c5748bb1de7531937ea47d77b0f8 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_159/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:885df97a4bb86da6b3d428773cf66f2f2afcc38e678af99706fcf35002d8bc3f +size 2296635 diff --git a/eval-results/mathqa/5/ckpt_159/results.json.tar.gz b/eval-results/mathqa/5/ckpt_159/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8641fe74548ec0720be8799dee7df7373ec3c5b2 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_159/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6389a6edfa4936e69ee8a9594d0124297cef4c24e64990e8b0efddcfe9470370 +size 2815 diff --git a/eval-results/mathqa/5/ckpt_162/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_162/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b9e53575f1ccc7539dbba47cafee6b19bd53859 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_162/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46ce907774b120e2e2cb29065429d22bebfa0bc33cd383174474408f7581485 +size 2296755 diff --git a/eval-results/mathqa/5/ckpt_162/results.json.tar.gz b/eval-results/mathqa/5/ckpt_162/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da3fb2561c205fec8c8d17922d0d3cbf1b0d6744 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_162/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f5b056ae916b620a8c1532b4562853cffe7af03a2d4471df01911de484545d0 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_165/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_165/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f413da22aabb0f39cb0aea9a0968f4cf6c44a75c --- /dev/null +++ b/eval-results/mathqa/5/ckpt_165/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f6e2da3125cd2bd1944c0f4515ba7d6c4983178514914b6e6f04c3c178cc79 +size 2296695 diff --git a/eval-results/mathqa/5/ckpt_165/results.json.tar.gz b/eval-results/mathqa/5/ckpt_165/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b9c6ab1e654dfbf57894c1d10e76ee45811bfb1 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_165/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:133dadf95cfaad6aa176c39b2086546761fb46f9ff8c21fe40e810c17e4a4fb9 +size 2816 diff --git a/eval-results/mathqa/5/ckpt_168/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_168/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29fd7f13759786afad6e4acc672229df51ec450e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_168/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a69140567d245760ef976309d9dd827bdcb3dc0bcbd5accef1373b3bc9cdf9af +size 2296450 diff --git a/eval-results/mathqa/5/ckpt_168/results.json.tar.gz b/eval-results/mathqa/5/ckpt_168/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a632857080f95cb576c0b00d390f2e23205b2b85 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_168/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a06d0ea7f7b1afda61f8bcbc16854acf039e38369dca2c3f10f5f73a05758d2 +size 2806 diff --git a/eval-results/mathqa/5/ckpt_171/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_171/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac0f6b64bab45470046b6797421fc531abab5676 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_171/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d326556106f4b3c61aa0593de0fe07aa92d4520e060cea383652e0fbe6b80c43 +size 2296617 diff --git a/eval-results/mathqa/5/ckpt_171/results.json.tar.gz b/eval-results/mathqa/5/ckpt_171/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7a6ee21272de42ec08b60a99998f0c980dec9a6 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_171/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea27d9aedc4a39cdd957141e7f1e2a498f61a2efb3ea11c2202258e282af76f +size 2816 diff --git a/eval-results/mathqa/5/ckpt_174/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_174/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7abd2a1ead07034ec12970f6de949f1ba0749221 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_174/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae502ed0cf727e134cb0cd03e64ad2b6cf60b4bfd84f4157d70cf16c28172608 +size 2296517 diff --git a/eval-results/mathqa/5/ckpt_174/results.json.tar.gz b/eval-results/mathqa/5/ckpt_174/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffed90e311dfad652bc46eba138812bc33e588d7 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_174/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00de98303c7a06ffe2ef31aa599b1ced82013c858565ec7afa6d7a7f13ac872b +size 2813 diff --git a/eval-results/mathqa/5/ckpt_177/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_177/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a67a4c9af5aaebbfab1779fa3909f1de4514b089 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_177/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e11b2f57a7ecabfc482e434874a69815e264b5eab85010d7d73db7659b155c66 +size 2297035 diff --git a/eval-results/mathqa/5/ckpt_177/results.json.tar.gz b/eval-results/mathqa/5/ckpt_177/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f43bd12e9bbe8c2bdfea75e376f3c8e9ec68697 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_177/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d1674ae722e796d8f7dfd3524418708af18144e75d6dd858bb768efa1898bb +size 2816 diff --git a/eval-results/mathqa/5/ckpt_180/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_180/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03a0c30a9d331be38082a842c39e6ea4e2d360e1 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_180/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34a19ebfdad2e10d2182b8c7c8e08eec409f538a803d4aefc1a86e9d1a168512 +size 2296596 diff --git a/eval-results/mathqa/5/ckpt_180/results.json.tar.gz b/eval-results/mathqa/5/ckpt_180/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..785d92409b7b1713c72b64c33139c04aed38b513 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_180/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a47a3fe4889bf1702903445e45995f273fede615197fb17af40efbfdfd248d +size 2814 diff --git a/eval-results/mathqa/5/ckpt_183/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_183/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df424084efa6799698305bed41d1c0d3ae1b499a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_183/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baec272e3ff2e6bd60054fdaf2ac7b88dcff8624a6eb2f9ddc85ee3412fb9b77 +size 2296999 diff --git a/eval-results/mathqa/5/ckpt_183/results.json.tar.gz b/eval-results/mathqa/5/ckpt_183/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb451789ea5c7736d7aafa5739ede935b59e28a5 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_183/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a1c1a898606b19975c226beb2cee852164cc7daef1ac20ce053a0d03020c74 +size 2817 diff --git a/eval-results/mathqa/5/ckpt_186/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_186/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..371f3315cc697d8f83312e8e13a2827b7fbb9e0a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_186/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33db65a2287b3b0054ab467a573ea159caf99e0e50660c7522ebaea917ccab6c +size 2296649 diff --git a/eval-results/mathqa/5/ckpt_186/results.json.tar.gz b/eval-results/mathqa/5/ckpt_186/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00c3d3e6b45598a4a893aeda8430bec7d2d9554a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_186/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10cf5438b612c43518bae75a3b1d564ab243bbc3dbdbe0eb40feeff52d8d9de7 +size 2815 diff --git a/eval-results/mathqa/5/ckpt_189/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_189/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5308f12e8d934a1986a4a1dcd08d28e8e8498e0c --- /dev/null +++ b/eval-results/mathqa/5/ckpt_189/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2661ae83b524c15b7e8a2297b93e5f73e90a3e2239fc7dca381c9b6ceacb2f05 +size 2296868 diff --git a/eval-results/mathqa/5/ckpt_189/results.json.tar.gz b/eval-results/mathqa/5/ckpt_189/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5c72b7dd2ae1566062bfd82923b4ed3fd84be23 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_189/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770274315ecf73445dd140a2644b76eed11579589c15c54532a3f886eb01ed17 +size 2812 diff --git a/eval-results/mathqa/5/ckpt_192/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_192/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1f4a23d08696e6319badcbfecaa3aafbe1e61dd --- /dev/null +++ b/eval-results/mathqa/5/ckpt_192/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fa8d7dde444ef8c15072e5391fd2afad6bce150bf182240f9dc02390e127bae +size 2297061 diff --git a/eval-results/mathqa/5/ckpt_192/results.json.tar.gz b/eval-results/mathqa/5/ckpt_192/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fee39ab87dea23cd4e8de0ee6396d40da8ef54b3 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_192/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd039ebe5bfe9971a60fbdd151d67fa67571a0ccf29e812e6d526737669f91c4 +size 2816 diff --git a/eval-results/mathqa/5/ckpt_195/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_195/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8af788776c41436534be1eddcbce2571a8396bfa --- /dev/null +++ b/eval-results/mathqa/5/ckpt_195/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b52c3f28bb64ee71d21c49a4396155d97c7beadceca7f5813bb827a3680146ac +size 2296826 diff --git a/eval-results/mathqa/5/ckpt_195/results.json.tar.gz b/eval-results/mathqa/5/ckpt_195/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47a276af94e13be038ee041205ac3b94a12a350b --- /dev/null +++ b/eval-results/mathqa/5/ckpt_195/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:113f11cf86d4c9e64f6c1edc26d0626f434084a30a19a74004b6bc2e50565677 +size 2816 diff --git a/eval-results/mathqa/5/ckpt_198/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_198/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eaa5bf7b3e27a23c93fd21a3ade531f163aa234f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_198/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d01c64214fc2107450c7cc8f195024815ba1883a944d253612b335f9628bd0 +size 2297219 diff --git a/eval-results/mathqa/5/ckpt_198/results.json.tar.gz b/eval-results/mathqa/5/ckpt_198/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9278908a676aa2f26e7863bd7907221b428996a9 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_198/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef473dbcad647a66230898dc4e3232e3fc0db6f307b0acbf052a5bbc7a61e229 +size 2812 diff --git a/eval-results/mathqa/5/ckpt_201/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_201/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a125687e40f460d4637b218b51b738ba09066552 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_201/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb79dbfde7d80aca997bc55e4d1526785a72062df448cdd2ab8ae133f1be5bf1 +size 2297281 diff --git a/eval-results/mathqa/5/ckpt_201/results.json.tar.gz b/eval-results/mathqa/5/ckpt_201/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..554c0358f554a2bee0e531afd264e77818409892 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_201/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de8f26b3198417dd6ae0f1f3948ecdcd09fc4d2a48793f13238c83c78a5789c +size 2816 diff --git a/eval-results/mathqa/5/ckpt_204/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_204/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbaa8b260332680ca76e04b0fac34c4b082d6db1 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_204/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb3a1b9fdedc796c7382cdf87b584b2c32b76f20d6579d05a593993b2d63d52 +size 2297158 diff --git a/eval-results/mathqa/5/ckpt_204/results.json.tar.gz b/eval-results/mathqa/5/ckpt_204/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a81e22bc9c00f57ea705cc03e5c9bea8c3d95843 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_204/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215bf0c1b73a8716e8cededda05b84e8c57fd5743a5139782e0060fdd5fec77e +size 2810 diff --git a/eval-results/mathqa/5/ckpt_207/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_207/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f4de4f1a2cfbf3345fe472105d311d229ad4902 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_207/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63e79eb3636dd67246b343ebcf744b32ffcfbc95dd37f13550f23a691f20956 +size 2296705 diff --git a/eval-results/mathqa/5/ckpt_207/results.json.tar.gz b/eval-results/mathqa/5/ckpt_207/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c12b812d6f52284cdd0d835bb757b882b42dad3f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_207/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7480616c6d400490e5ce59e316c49ab5dd6d655972dbe74a4aab97d509f7f3 +size 2814 diff --git a/eval-results/mathqa/5/ckpt_210/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_210/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5bee8a7e1689b6bbd3c29a090da4aa5a3bbac3f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_210/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b15f42380bcf7be686756334ddcd02f5fc6e18f402294cf51a10a8fc77f998a +size 2296714 diff --git a/eval-results/mathqa/5/ckpt_210/results.json.tar.gz b/eval-results/mathqa/5/ckpt_210/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e63fcac0ac05d6e40245872e02d8c15f268497e4 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_210/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e59acce92b0e381b31f454fd1af397a45ee4bd1631a7310a384fc07e887a153 +size 2812 diff --git a/eval-results/mathqa/5/ckpt_213/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_213/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b76ac23a3a2c4a956e148b19c5d9995e9ebcbd3 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_213/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5300f804b7c83f73827036bb4da33c8bdb4adbd1b99578f9a4e642af424346 +size 2297016 diff --git a/eval-results/mathqa/5/ckpt_213/results.json.tar.gz b/eval-results/mathqa/5/ckpt_213/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..533e64651a314b3435deabfad5d7f8beccbfc505 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_213/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64f572e2ce37a766880de2ee585ccfdff72723cc74a7c49614ce30a3bf633321 +size 2811 diff --git a/eval-results/mathqa/5/ckpt_216/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_216/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e998578f5bd0927aee7ebf2a449992795d5d36f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_216/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02dfdd01ff2edd0557c794405705fda3cf1b8d389f0681447f1a059c74f368ed +size 2296773 diff --git a/eval-results/mathqa/5/ckpt_216/results.json.tar.gz b/eval-results/mathqa/5/ckpt_216/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00abd10411da87e08629c8c4ec64a6e0aa0aa587 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_216/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa035c16663cd8f8a76bd343969c695ce178a6c07b0e40fc8f082ceea6961418 +size 2812 diff --git a/eval-results/mathqa/5/ckpt_219/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_219/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b49bfb61becd9bfad7527e4cf011aa8c2492f2f2 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_219/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31ab6099746d34f87b32ae5ab55c18bdb127ce04772e4aa053676ddf65b05d5a +size 2296953 diff --git a/eval-results/mathqa/5/ckpt_219/results.json.tar.gz b/eval-results/mathqa/5/ckpt_219/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0db9db1299c0bc12b5a83550b35922418bdb2c1 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_219/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f4d11098764c6e1d98c1074709bb28ca248ff00e5eee9fcd3c8b7ee099208a +size 2814 diff --git a/eval-results/mathqa/5/ckpt_222/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_222/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1b1c68a25c4e81add7cf2fcc3064f37ed42e0f0 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_222/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fdecbd3bf93c4565244b91501b9aa02054b517fbb6a794ff5502420bf8c1b0b +size 2296979 diff --git a/eval-results/mathqa/5/ckpt_222/results.json.tar.gz b/eval-results/mathqa/5/ckpt_222/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..389196cda21a5e65f685e1a26aee28f86462e307 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_222/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78990e10b2dada7efc3f1538665e7f162789ea823255795b370b043a61c01f32 +size 2815 diff --git a/eval-results/mathqa/5/ckpt_225/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_225/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38fcda60da2b9d7ce177d1df7a1812953da39345 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_225/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edddc8efd1bfc4423a756b37410fd90550c237ef7bc0502506b250e557a5aa0c +size 2297086 diff --git a/eval-results/mathqa/5/ckpt_225/results.json.tar.gz b/eval-results/mathqa/5/ckpt_225/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc156036b771296b32ab7752d8da518ff7f58612 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_225/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37701c9fea794603b6f9848e927264097abc41d3d5586ea8c1d71d5dbf35912d +size 2814 diff --git a/eval-results/mathqa/5/ckpt_228/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_228/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d500390c62a4a14a5e77d9f165c1449fbd391bd9 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_228/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c49b8e9c5e39553cad4d62ca267bc961d21ae2f773b330a404049c1126a1ab +size 2297394 diff --git a/eval-results/mathqa/5/ckpt_228/results.json.tar.gz b/eval-results/mathqa/5/ckpt_228/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36f38c682f9fe1a23348421995680b667aef0b2a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_228/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f23791ce04afe26882ac2b2d5bfba9a2eabd3f5cc005b1ecfc99f77c843ed950 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_231/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_231/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..071c88b4d7e2c04e84ae631569d4ceb480f474cf --- /dev/null +++ b/eval-results/mathqa/5/ckpt_231/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82ddcdff71ebfab89e0a20e25d970cf4df134b585a4f18f40dff2dec2f2c08df +size 2297637 diff --git a/eval-results/mathqa/5/ckpt_231/results.json.tar.gz b/eval-results/mathqa/5/ckpt_231/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72f803c2ec095c9bb297b35e7e319faa2aa215cf --- /dev/null +++ b/eval-results/mathqa/5/ckpt_231/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2817d83f6c58de0a38860b6a7ef06882948f5f525da726deb2299fa8f4548cb2 +size 2811 diff --git a/eval-results/mathqa/5/ckpt_234/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_234/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdef448db53cdefdc30aff294af05cf71c83c81a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_234/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e1e15d588e95829932793343ddf7700dcd3a83641e97a0f2f1bc1255e1c5e67 +size 2297618 diff --git a/eval-results/mathqa/5/ckpt_234/results.json.tar.gz b/eval-results/mathqa/5/ckpt_234/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a978a98466de0a14a09c34dcc6474f1d74f4fb0a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_234/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f9cdb4f8a265849a3614e7b747392cfc7832fdd3774ab2ec392ba8381fa0c1 +size 2816 diff --git a/eval-results/mathqa/5/ckpt_237/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_237/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..255a3b039e907185227c4c50046cf914a6a11add --- /dev/null +++ b/eval-results/mathqa/5/ckpt_237/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dec55eebf8aacc9e63a54fcad371579184e5a331d8a09dc100db162e0491eb01 +size 2297287 diff --git a/eval-results/mathqa/5/ckpt_237/results.json.tar.gz b/eval-results/mathqa/5/ckpt_237/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..778be9807501671fd06c14e40b3640863c95e230 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_237/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20eaed04441fa1fc0c0d8ce56cebba33d5c3d75f70429fbd40149379423e6a62 +size 2811 diff --git a/eval-results/mathqa/5/ckpt_240/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_240/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..376e503022a30f2c2ad31a31e14135ede00447a0 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_240/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d7fd14731f2ef6cf61d1e85b310e2384d2fde048d7e0cec1650eb2d885803d9 +size 2297284 diff --git a/eval-results/mathqa/5/ckpt_240/results.json.tar.gz b/eval-results/mathqa/5/ckpt_240/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bb314a0296301c0b6ce3d192fe4f94bafd9c184 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_240/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d576628ca2d5f0fa357af644a4a36d451f12922832ac4a4b28b60b84538d9c0f +size 2814 diff --git a/eval-results/mathqa/5/ckpt_243/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_243/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8b0803fefb78c0ae901d347005869b140ed5214 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_243/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c00938a9bfc61e4994fec24906d22469332884cccf3c1ce1b9af5d6b5c191bf +size 2297241 diff --git a/eval-results/mathqa/5/ckpt_243/results.json.tar.gz b/eval-results/mathqa/5/ckpt_243/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6be28587172920e56bd1ec7784f7879d45b365b --- /dev/null +++ b/eval-results/mathqa/5/ckpt_243/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8441d82c10f83ca8371afa11c977ddab3318e742a79a8a3c549a6a59098c2789 +size 2814 diff --git a/eval-results/mathqa/5/ckpt_246/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_246/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61fddac56257b01ff29e5c1db889e65242f31668 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_246/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d46293841967e8026a22ae048a9b6c240d7843379b88fd9e30b86721183d01 +size 2296961 diff --git a/eval-results/mathqa/5/ckpt_246/results.json.tar.gz b/eval-results/mathqa/5/ckpt_246/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b938536f493a03b2e405687d91e8a98a87cfb8c --- /dev/null +++ b/eval-results/mathqa/5/ckpt_246/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd33fac95500e720476d39906b65583f0c7dab2a5e3420e4b32ce79dd4b96dd +size 2815 diff --git a/eval-results/mathqa/5/ckpt_249/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_249/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9085919cc4fe1dbe08327910d655965ad61285cc --- /dev/null +++ b/eval-results/mathqa/5/ckpt_249/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d27c55ab6530bb50071594b22c1a6a58a26ccd64accfb49506572fdc33aa2cbc +size 2297128 diff --git a/eval-results/mathqa/5/ckpt_249/results.json.tar.gz b/eval-results/mathqa/5/ckpt_249/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2539ba0dd5405e3bada9e1d53c1887d42b82c072 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_249/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa984675ed1e55cbbb8a87c73a69a28f6b47228e7c7f7ce44005feef3ba46242 +size 2818 diff --git a/eval-results/mathqa/5/ckpt_252/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_252/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..626fdc7565b8697a38a89cfa0a1d59c9722e8e8e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_252/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:102a21ac6cecdb85c7b14fb4f06aad145b36648174ea1a98c05a1abe042dd022 +size 2297257 diff --git a/eval-results/mathqa/5/ckpt_252/results.json.tar.gz b/eval-results/mathqa/5/ckpt_252/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f61755b4a3204df9ec771b7ecbcbb4201691de1 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_252/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1bf1b80bdd53f334658dc0e122e9619ea04ac9b5ddba9ce1d9ebdcdd9733ffb +size 2814 diff --git a/eval-results/mathqa/5/ckpt_255/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_255/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2407fd6c3e7803355070d3caf02f20ebedbe34cd --- /dev/null +++ b/eval-results/mathqa/5/ckpt_255/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f9ac9a2d66bff6dfcdd53d9e4b5a739939d3c15feffedef4f479d075cb9defb +size 2297140 diff --git a/eval-results/mathqa/5/ckpt_255/results.json.tar.gz b/eval-results/mathqa/5/ckpt_255/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60706b4062443d2347a5b25285adbbfd14267bf6 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_255/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e0682b7cd5c7137bb373377891401c1ddb93707e09a7ba29a0372d6ed07b29c +size 2816 diff --git a/eval-results/mathqa/5/ckpt_258/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_258/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df8664d63a771f249224711765c5295bd8845c29 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_258/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eeb8a689214f3c6a63f298d42adae8ac21191a5f58e77e2997225d50924d19b +size 2297500 diff --git a/eval-results/mathqa/5/ckpt_258/results.json.tar.gz b/eval-results/mathqa/5/ckpt_258/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12b82ffca4b6bc3fdc0c61d99b0955578bf3f334 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_258/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52361ec3da9cc2f43baadaeceac828aec874ef2cc8bf7fb38004ffba5ef01aad +size 2815 diff --git a/eval-results/mathqa/5/ckpt_261/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_261/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5222d9953f32c4fe326075ca95d48fa64c43c96 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_261/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c59ba7708f6daa164c59bf377509ccb44a26d7fb3b0bcccabb32c973b1b07b1 +size 2297266 diff --git a/eval-results/mathqa/5/ckpt_261/results.json.tar.gz b/eval-results/mathqa/5/ckpt_261/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfe07e54a87c647261c550d904d0385e306e0e1f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_261/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:744429ad9f897034d3aee09bda593d93d316e66445b44112e0b9ebb7ee5c190a +size 2814 diff --git a/eval-results/mathqa/5/ckpt_264/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_264/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ff920268a5474473a660ce7fa54aed6f8fae1f7 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_264/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d317eb5998fc5f94d4adf2bb649fa7da185fdfff9dd5ed64a64e79bf490d303a +size 2297134 diff --git a/eval-results/mathqa/5/ckpt_264/results.json.tar.gz b/eval-results/mathqa/5/ckpt_264/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0962b064d7243263a5ae3358d21b11fcc7c72fc --- /dev/null +++ b/eval-results/mathqa/5/ckpt_264/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2273092585b31103fa382a9effae6502b5f5df236fcb779f6894987b57820754 +size 2815 diff --git a/eval-results/mathqa/5/ckpt_267/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_267/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f8cd71d1f8aec17e2e7d14f8edddc2d07c5617e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_267/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f93f5b070d033fb30e22472623c00bfa0a2dec41b2513e2516c0779f634483f9 +size 2297368 diff --git a/eval-results/mathqa/5/ckpt_267/results.json.tar.gz b/eval-results/mathqa/5/ckpt_267/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e2855dd63bb6bcbbada810bb0ee93b2bed17ea0 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_267/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2787504bf8412050e8d75e10b5d97f616f4673bb3d767274e34cd4bed829649e +size 2815 diff --git a/eval-results/mathqa/5/ckpt_270/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_270/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f9df83e7ccf850d7d754cc077269edd11f0d0da --- /dev/null +++ b/eval-results/mathqa/5/ckpt_270/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3283d48792ceae86d18173927e5513f4342eeeb77ed98f16fd7a45e74da18e9 +size 2297721 diff --git a/eval-results/mathqa/5/ckpt_270/results.json.tar.gz b/eval-results/mathqa/5/ckpt_270/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ea37968cceebaf931723677eb98244f5d208313 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_270/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a61c2dc5c710a670e1ffcf2280d0ddf63f88f7a3e9eae4d6b69b9eb76d4d6895 +size 2815 diff --git a/eval-results/mathqa/5/ckpt_273/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_273/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba976094d4c7cf989989191c43901b16aefcfafd --- /dev/null +++ b/eval-results/mathqa/5/ckpt_273/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8247dc68c3985c1dcc808ddcc569781cdb12bc78b5d7a33fec5629196d9b6fcd +size 2297460 diff --git a/eval-results/mathqa/5/ckpt_273/results.json.tar.gz b/eval-results/mathqa/5/ckpt_273/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae564213f658782265aefc2dbea61fcebb0b836a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_273/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d20f5a236fe17671f6c5f270b6a0a9c9b3e2e4c538da99c8f86dc428790245 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_276/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_276/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d13be907d0e88e183dc248526e99971bd449088 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_276/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:430658d54f2d9bbecbd237d8430f13cdd996a9d3b9aea08970680eb245038e99 +size 2297368 diff --git a/eval-results/mathqa/5/ckpt_276/results.json.tar.gz b/eval-results/mathqa/5/ckpt_276/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79eca593f75996813e93a26dbc4a929ca94205a8 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_276/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60a5fe1bd10ba1c66c6e543b726255dc569d06c5e44fb57d19ac19c76d9b250c +size 2813 diff --git a/eval-results/mathqa/5/ckpt_279/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_279/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..623a1e648d04b51db7b285b9058adef419c8e26c --- /dev/null +++ b/eval-results/mathqa/5/ckpt_279/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e5f051a613ffa4bf409348563e6275ee05ee34c9baf91294cd0da646654b4f +size 2297561 diff --git a/eval-results/mathqa/5/ckpt_279/results.json.tar.gz b/eval-results/mathqa/5/ckpt_279/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d12227b220463e544b59dcc9c34d6269a49f17d8 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_279/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27556961cf33bca6635ffee63f20cdf2cbdb912fa57e6f174c4837f4a17d3d8f +size 2809 diff --git a/eval-results/mathqa/5/ckpt_282/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_282/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c08b50597908e3d12f723fa91603db32206bb4f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_282/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c69375e08250ee61c659e47c27b080e64de04985b1ada87fc17ad0fe887b0801 +size 2297241 diff --git a/eval-results/mathqa/5/ckpt_282/results.json.tar.gz b/eval-results/mathqa/5/ckpt_282/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8875e23707f181d1eb2e5ea54f317e6f497db8f9 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_282/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1be7dee7d9a16b70c95d82ed1c8e9f1db2ef22f5bb1187932cb53e3e20cea7f +size 2817 diff --git a/eval-results/mathqa/5/ckpt_285/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_285/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcfab34d62333e8f8ba26a6b547b78e75bfb311e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_285/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b996dbadebd3efb54f2dbb66eeb234281a4c6d04f4179023814c4544ec25f66 +size 2297236 diff --git a/eval-results/mathqa/5/ckpt_285/results.json.tar.gz b/eval-results/mathqa/5/ckpt_285/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80df0a68a236ec29accbed1f76c7efece2f4aedd --- /dev/null +++ b/eval-results/mathqa/5/ckpt_285/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf45bf47ba905c9bde750f5abc055cb91cde83eef6d75105e8d44a10cec0dead +size 2816 diff --git a/eval-results/mathqa/5/ckpt_288/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_288/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..695dfaf9d110687c193f36e31994966ca3554e98 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_288/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:054c011f7b8fa73790733ecdbeca11697d50109ee55fc740b974ad8402f692b6 +size 2297526 diff --git a/eval-results/mathqa/5/ckpt_288/results.json.tar.gz b/eval-results/mathqa/5/ckpt_288/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04bc5501447220a2898ef438affab5625b3c2713 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_288/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6fdbd35957c3e82ae0f801672051ec02fdf30090df42abf4745dc80c98e223 +size 2814 diff --git a/eval-results/mathqa/5/ckpt_291/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_291/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..355c14d61ec9ebae53f6ffc6171331a042aa002e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_291/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb4f916304936f6cf08e5b4dd5d576a022d40594208f44db4b8cf20f7c04c5e +size 2297413 diff --git a/eval-results/mathqa/5/ckpt_291/results.json.tar.gz b/eval-results/mathqa/5/ckpt_291/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8073e9c5f8146598fc0a62ecc6c4d64b3171ed2e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_291/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1e40ab75ff3b05971d8d28250ffcd67d369a3b870f0fd33561f679fa072c07 +size 2811 diff --git a/eval-results/mathqa/5/ckpt_294/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_294/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6452869278199ab690831273cd5c07e1972bff0a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_294/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb59be8277d99e4ae044b1746742edbc03dd3926c3d2582bda5b2658fe47cfb2 +size 2297444 diff --git a/eval-results/mathqa/5/ckpt_294/results.json.tar.gz b/eval-results/mathqa/5/ckpt_294/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7545f0f86088c34a47dc5e5c8f09b4fad005ade --- /dev/null +++ b/eval-results/mathqa/5/ckpt_294/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f96ae983d9848ee72fb2b565bda05a4b65140425ba99154af6fcbee23ffa5bae +size 2817 diff --git a/eval-results/mathqa/5/ckpt_297/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_297/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7e91fd99e0712bd6fa482e6bf978e1f1bf08cc2 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_297/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a20648e7f4624e0e9635c0c83a7fc850d011cd408c7b0756f1ab1205c293db +size 2297289 diff --git a/eval-results/mathqa/5/ckpt_297/results.json.tar.gz b/eval-results/mathqa/5/ckpt_297/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..349b8587f14ec13318954badf71b2aeda3e3daa1 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_297/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4050fc04775f72b357be21560c8cdb6ee3e357a5c31ea264e90c664530d4e22 +size 2812 diff --git a/eval-results/mathqa/5/ckpt_300/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_300/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cfd697f6586bbd57e8c367eb9e9e67b8178f709 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_300/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f13c3914ac923c2568577b66ddbe3f0f1094017f22b2a7328084544076bb4e2 +size 2297617 diff --git a/eval-results/mathqa/5/ckpt_300/results.json.tar.gz b/eval-results/mathqa/5/ckpt_300/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c1bd1aeb26e46a7eb467e45ba729817cafae23f --- /dev/null +++ b/eval-results/mathqa/5/ckpt_300/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6eb74432746cf61cd6798e14e6fee2612d8173106ea6e0962af047a28cc9e6 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_303/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_303/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a24e4ae4e0f2d59373c6ef6814400fe34ff6eac --- /dev/null +++ b/eval-results/mathqa/5/ckpt_303/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c59ffeaf7e78f062f2e92e33b4cccd810502622bed4b0d1b857bff45802d6db +size 2297659 diff --git a/eval-results/mathqa/5/ckpt_303/results.json.tar.gz b/eval-results/mathqa/5/ckpt_303/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b52df7530a578fec3c4d1d40a49c929ae43126de --- /dev/null +++ b/eval-results/mathqa/5/ckpt_303/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1a7cf04466d8ad1a8d1f8f55789353256a4d3aaca02a2aa9b40a56542b01aa +size 2812 diff --git a/eval-results/mathqa/5/ckpt_306/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_306/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a75d4ce743242ae19275e3d20cacb599a5390a5b --- /dev/null +++ b/eval-results/mathqa/5/ckpt_306/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ad8e609afec0960ba656e555b0f38314dd1f4eb75b5988486d5c21be4c8db4 +size 2297395 diff --git a/eval-results/mathqa/5/ckpt_306/results.json.tar.gz b/eval-results/mathqa/5/ckpt_306/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc85dbe93db538dd6622dfe0fed19eebd35b1825 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_306/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b75620efc43a34b4a9f812d7e00d15dedc12fbafd7014491758944b703fdfd +size 2813 diff --git a/eval-results/mathqa/5/ckpt_309/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_309/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8191e42245b23f547d3c2c38c8216437e5caccc --- /dev/null +++ b/eval-results/mathqa/5/ckpt_309/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e696d6688e64c8d720f9f7bbe90fc240b0be11a364a14a73cce3ff94e89fca +size 2297788 diff --git a/eval-results/mathqa/5/ckpt_309/results.json.tar.gz b/eval-results/mathqa/5/ckpt_309/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..143ea5cc9e0f567caa8b3160122f9f630b5a2ac0 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_309/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e390ab7133abf29dcd58dc5b1e922034b91b4a15db0f75e661c659a80ab14f87 +size 2809 diff --git a/eval-results/mathqa/5/ckpt_312/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_312/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf8ac5cc99209aa2883be1205068eb2be73d5b3e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_312/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e1b5a89bd34082025fe8a65f387105cec1a5165248f093bd044ac7a12ce845b +size 2297700 diff --git a/eval-results/mathqa/5/ckpt_312/results.json.tar.gz b/eval-results/mathqa/5/ckpt_312/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15576302fd3b7ce0214a82dfc1908323805cef71 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_312/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5897270f06677d3272ba1333c3541e5e8e32626214b642e7024b12628cc7053a +size 2817 diff --git a/eval-results/mathqa/5/ckpt_315/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_315/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54ce74a33f92cf6bf54565db0fc895b2f3b0c048 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_315/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917df7fd30bcd96d8fe372ef9ecd3acad96b13aa8223e6b468901fc406b0cca4 +size 2297588 diff --git a/eval-results/mathqa/5/ckpt_315/results.json.tar.gz b/eval-results/mathqa/5/ckpt_315/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ffde1c7505817f6604261b2c539ea509711ebac --- /dev/null +++ b/eval-results/mathqa/5/ckpt_315/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:029dce3162a8ddad7c7bb7790bb52ae94910dffe865c98e10165b6f9ccadb7bf +size 2817 diff --git a/eval-results/mathqa/5/ckpt_318/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_318/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9b0364ced1f6f7dbb8331efb69a258809f83a77 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_318/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d145cbfaba4369a373869872f83414e2c0988396963e3ed5e7c48c53806ba52d +size 2297653 diff --git a/eval-results/mathqa/5/ckpt_318/results.json.tar.gz b/eval-results/mathqa/5/ckpt_318/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7627334157307da7d234c773127752f0d3d84689 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_318/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91d8b6174cd325c22633b57b73e4a571220c30dcf5a4bb53caac21764b4b1a24 +size 2816 diff --git a/eval-results/mathqa/5/ckpt_321/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_321/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbfd09ccf9aa6a99b6dc5182a596696f4bb4a473 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_321/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:304a6910bee851b4dc2bc9e0805de8e299f17bb2bf6fbc26b271005eb0e5505d +size 2297609 diff --git a/eval-results/mathqa/5/ckpt_321/results.json.tar.gz b/eval-results/mathqa/5/ckpt_321/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d78b23d8e8f33989fb661cae8ec47f05d4811f7a --- /dev/null +++ b/eval-results/mathqa/5/ckpt_321/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3769692d4b53ebfe7312900d4a3ff076dd03c86b865b03d125f9eb4c37efd44a +size 2816 diff --git a/eval-results/mathqa/5/ckpt_324/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_324/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e864e05107798be0307711369a0c1e89536f7e12 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_324/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:600b054c672861062256ccd76f409ca9c9382c5c8229564f36eeff9e023ac390 +size 2297501 diff --git a/eval-results/mathqa/5/ckpt_324/results.json.tar.gz b/eval-results/mathqa/5/ckpt_324/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28425c598b7d319790fc92a7effc7eddc74e4961 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_324/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3cf05134dfd98b0552916e4c704ff31e1d16ba8f215d260b3d54ac7131201d +size 2818 diff --git a/eval-results/mathqa/5/ckpt_327/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_327/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9b7968e27769b2097380f94147116499d90b8a0 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_327/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d90b043a6d2f1be80729998ed154e7b21ad69287e13274d4ded0770c254f93 +size 2297831 diff --git a/eval-results/mathqa/5/ckpt_327/results.json.tar.gz b/eval-results/mathqa/5/ckpt_327/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb4d892168a03b8e0b8e29ec0e984ebebef0f5b8 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_327/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30938e95015c890eb8ae975672a190791ec81fbfd337fe9572b642cbb7ba6c5 +size 2811 diff --git a/eval-results/mathqa/5/ckpt_330/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_330/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..229b1afb28d57bf30879f96f6b6ffbc8d0422843 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_330/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8557329d350ca8e66c77c82fa543cdab4953b922387860c8672d8630ae26e671 +size 2297642 diff --git a/eval-results/mathqa/5/ckpt_330/results.json.tar.gz b/eval-results/mathqa/5/ckpt_330/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..103f260ac227772bae5ccca97f97ba9e0ec5d004 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_330/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c781f7d22cedbb03d21f23560be88fb262d77974dc93d07af3ed62d29fe8853f +size 2812 diff --git a/eval-results/mathqa/5/ckpt_333/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_333/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d17148f4ea98e02868e0e1df685ce790d0f7f2c --- /dev/null +++ b/eval-results/mathqa/5/ckpt_333/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:025f0144f7089a55e5326737c90586bffcda733574e2201f68e28a9346825ff4 +size 2297673 diff --git a/eval-results/mathqa/5/ckpt_333/results.json.tar.gz b/eval-results/mathqa/5/ckpt_333/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d36a43bdfdb5b2e59bc9d0e1f3069fa9d2a98f7 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_333/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd401324335e8085b5fa5c00b31d0db081ea8bd6c90ad83c4826319a721da887 +size 2814 diff --git a/eval-results/mathqa/5/ckpt_336/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_336/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e78fa77544e0ed6c5fe680584322ce76bbbedafa --- /dev/null +++ b/eval-results/mathqa/5/ckpt_336/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7653daa935412e8fd7da2ed18e3f4c6faad2ed2c2bf44b79e09d29ace8b423 +size 2297471 diff --git a/eval-results/mathqa/5/ckpt_336/results.json.tar.gz b/eval-results/mathqa/5/ckpt_336/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c76d75f7591b192fe1f085080dde6563ffaaae72 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_336/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f10c0ca077214b7fc7ea3fc913853a079c2c287080e6b514158d41ae4d9f8c24 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_339/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_339/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b62b23222fc66b9c92eee44c54cb2dc1dc1bfbc --- /dev/null +++ b/eval-results/mathqa/5/ckpt_339/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6e7ff080ad389259334420216e7102b83ce8f86408042caa970fe5df88f9734 +size 2297728 diff --git a/eval-results/mathqa/5/ckpt_339/results.json.tar.gz b/eval-results/mathqa/5/ckpt_339/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ead921990c33072911734ab7d0b1e1e61779e81b --- /dev/null +++ b/eval-results/mathqa/5/ckpt_339/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d77f79d847236032f1544a736bdec27ae1ab76e1a3a895b32e0c2f2e7bdf0558 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_342/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_342/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b19121b5bc723ada47a8f9366ab67a8118656c00 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_342/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0061e84be4d12c9c60e6337b2bdf58c3f1f860a6588c0fc3b20288d9d0f877d +size 2297721 diff --git a/eval-results/mathqa/5/ckpt_342/results.json.tar.gz b/eval-results/mathqa/5/ckpt_342/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc97039bb00b9b8dcab3276e4218d9bea173c0af --- /dev/null +++ b/eval-results/mathqa/5/ckpt_342/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4654d64c1e1f96484adb812c614637818a91e3740fc467b8078031b57b2120ff +size 2813 diff --git a/eval-results/mathqa/5/ckpt_345/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_345/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..239cba490938fe14f78e05ecc828d4af531f129b --- /dev/null +++ b/eval-results/mathqa/5/ckpt_345/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c2ea9e25322d1ac18a2d646df707bd1bacdc8fd22e477c14bae4f293c8197f +size 2297641 diff --git a/eval-results/mathqa/5/ckpt_345/results.json.tar.gz b/eval-results/mathqa/5/ckpt_345/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cff6bba3f8fd96c7c75ea7da465eb72f35a3680e --- /dev/null +++ b/eval-results/mathqa/5/ckpt_345/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bd2368b0c7b55b2aa2d6428a874e706fa447ba18f88581e82ec767e17c2c52c +size 2816 diff --git a/eval-results/mathqa/5/ckpt_348/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_348/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5794ee0e8dccb6ff4a17c86fe1914faa89ce7e0 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_348/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89c6668a2283beb13c20b6b4737d0e6851ed9c11ed22d6e2ef78fc62c1ce585a +size 2297706 diff --git a/eval-results/mathqa/5/ckpt_348/results.json.tar.gz b/eval-results/mathqa/5/ckpt_348/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18bf0bbb85797b802e2208009e097dfc87790616 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_348/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9e2bcd17309a1a81f8b54f6a7de3b17b70b6942aa9bd937460893e01516a9de +size 2814 diff --git a/eval-results/mathqa/5/ckpt_351/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_351/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11e154d7a7cba273f872b5e0f493521dff64bb85 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_351/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4066a5f996dafacab07cc9035703deb3276c048b79a3bb24368a73c064153835 +size 2297431 diff --git a/eval-results/mathqa/5/ckpt_351/results.json.tar.gz b/eval-results/mathqa/5/ckpt_351/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76401897b8227e4cc9674980d40ac67f83311f33 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_351/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5503f5daf0806d620b4c3073b0972b6a6d1a2e97b8be352b0dc6e841e915f98d +size 2811 diff --git a/eval-results/mathqa/5/ckpt_354/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_354/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..646ceb7eaf8f5e02e5e38184effcf0e59d35cfff --- /dev/null +++ b/eval-results/mathqa/5/ckpt_354/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e9c4060215495f3af42b8bfc1b607a9063c8edbb24d658f3bed5441ccb98f4a +size 2297789 diff --git a/eval-results/mathqa/5/ckpt_354/results.json.tar.gz b/eval-results/mathqa/5/ckpt_354/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ed44f072637b5a27526a47f4ec036527e7962ac --- /dev/null +++ b/eval-results/mathqa/5/ckpt_354/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:627997f828cc36e6568773040344f393ea3e60b9466b86fec2b2974ce1380e88 +size 2816 diff --git a/eval-results/mathqa/5/ckpt_357/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_357/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a4d185ad3b375a256d8de02ed5c71294eae57f1 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_357/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5ac769bdaa2c0f6e0c302925ea260077b309b50ebd77e73156f555da41e1299 +size 2297780 diff --git a/eval-results/mathqa/5/ckpt_357/results.json.tar.gz b/eval-results/mathqa/5/ckpt_357/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da5a9fd451110e6768770cc57431696739f95bd6 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_357/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94d882790ef0c47f274a671874535f20465e78fa0d0d03aab4e51360a5ac0413 +size 2813 diff --git a/eval-results/mathqa/5/ckpt_360/mathqa.jsonl.tar.gz b/eval-results/mathqa/5/ckpt_360/mathqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c888de4e34ad32da32add64afb0f3ab300e11b8 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_360/mathqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:634a1a029dc021b8c0e3236b5d091d9a2273852a962cd58a3121368538548458 +size 2297591 diff --git a/eval-results/mathqa/5/ckpt_360/results.json.tar.gz b/eval-results/mathqa/5/ckpt_360/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e37f4e52edd3fc6b641429caa402b8ef8924249 --- /dev/null +++ b/eval-results/mathqa/5/ckpt_360/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e6f6e7d1e9690d9d62adef989e3fb500a8b512291cc0db2b31e3ed1b5f50b29 +size 2818 diff --git a/eval-results/medmcqa/5/ckpt_003/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_003/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b9a32b6290c4237dcfc3a51bdf8293ff218eb2d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_003/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f952d137429175844cc2dd7be18697761038adbad6b1ec106d2f5bb49e1d1012 +size 3489639 diff --git a/eval-results/medmcqa/5/ckpt_003/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_003/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73bac88b1f0cdc6239880c539d9287475b874b3d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_003/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9c26cc34c61648cddc09640dcf77829b0036a364f203da42b3966e7e5af479f +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_006/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_006/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba95d7acaba115a2ee4d7fa9a08c92d51ee0f45d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_006/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dc3ac127c72b136c72e7890535a9649ed01dd529a67274b741f3d640921f07e +size 3481349 diff --git a/eval-results/medmcqa/5/ckpt_006/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_006/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60d1ec78fd1e74a940e12f0cb621d57e75bd4811 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_006/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8e1c8c34cd7b4163ae715757e54d172fca5935b5465a121d0ceed6c31f9e9f2 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_009/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_009/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63d83349766ceb3bfc4e7602f86ee2402e2e8b53 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_009/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e24320df2a46300043adb1e3f57750f6209aaf0a44ddb32c14f27ce1d298f3f +size 3480603 diff --git a/eval-results/medmcqa/5/ckpt_009/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_009/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c6a4210bfa14d9dc82c42737b047b586a364d2d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_009/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9af2d283b3779ac74b34f73640782f31fbea440308407cf16bbd276aee7f18c1 +size 2852 diff --git a/eval-results/medmcqa/5/ckpt_012/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_012/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fa106cee5bb19bfa9d2c1b5afe84530c22a3504 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_012/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:612536498a858681c2706ced6fc7601d76c9e3c13bbe1ae17103423c9454b4c9 +size 3479032 diff --git a/eval-results/medmcqa/5/ckpt_012/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_012/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f03528373c7e6ce8a43a2bc3f48d0b3dc8c9b377 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_012/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51aed8f84bf1f09f6819dd3797dd38cf5859cc6b92a69f4bb31eafe9bc0c0b7d +size 2888 diff --git a/eval-results/medmcqa/5/ckpt_015/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_015/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..288ab3c713dad55b78234579ca5cfb0268a1290c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_015/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fa599006c3914bbad01e8182a96756cd3ae2474b404ec4a471e60dab97edce2 +size 3479138 diff --git a/eval-results/medmcqa/5/ckpt_015/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_015/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f11e5de0626799340389b84b2a269eea5ac29b9 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_015/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d57ddac76ba46775fcd12cf73d9630ec1f7ce170a7bf54826781fe07ee0ab220 +size 2852 diff --git a/eval-results/medmcqa/5/ckpt_018/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_018/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21df2c6a5276fa3ed71af9a4ae621966d0c70bd8 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_018/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50ae2e3a453690a2e2166a7b159681317a3fef6a91984138196a03312450e86f +size 3478449 diff --git a/eval-results/medmcqa/5/ckpt_018/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_018/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c4aafdc2e1676308e7ccdd1856394634b619074 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_018/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61c915ec047da1ecdc7e4586d46c6c4c548ca2b115f97e76f13006e9afe959a9 +size 2854 diff --git a/eval-results/medmcqa/5/ckpt_021/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_021/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b253f745a8b0d8fbabffbd0fec88fb62ea9e254 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_021/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9562b83cd390f98979257f2ef702e900e86a1f6df55dbd9a54651e01b6a6bf3 +size 3476338 diff --git a/eval-results/medmcqa/5/ckpt_021/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_021/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23504c3394ce9ae5e4666f62334bedb415c73325 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_021/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79369192c4cca30ca2e00a48bb1e6769a35f6ef73ff55266f18bb350bae6ce32 +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_024/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_024/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db53ebb0aa446a5040812662293b6812d9ff8b71 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_024/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b73c60f4cffd4e50fa463c46adc2bc0acf007dd3dc826f8ebe23cc4c747dca +size 3475250 diff --git a/eval-results/medmcqa/5/ckpt_024/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_024/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e86e8d34ea57479d0d0c0a79bf5086f9d297b63 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_024/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204bc81c02880cfb7770b67d0021ea96b3cbfea2a8bd4da711f09cd3963a952c +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_027/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_027/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..137bd4d792c7c802619973a5e9148d0b101e8747 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_027/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c00a0d75e84665dbbf729788935ec072c38421f45e7f3cb2d86820f0b3fc9d13 +size 3477987 diff --git a/eval-results/medmcqa/5/ckpt_027/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_027/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c935e75709912f1a824c13782de71a00f38160c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_027/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8100fa89d18868dde3b2d726e1f25eaa5c693dc1981afaac16c539e0c47c1496 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_030/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_030/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c14803d1d9ed2f0d34551ab17a41ed5cad85f471 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_030/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ef42f4614b9c06909f5a04276146099359a2288e10a803bca4d4bcfabc1f287 +size 3477053 diff --git a/eval-results/medmcqa/5/ckpt_030/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_030/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e48f0388682e8e38f15c27044c21d94eea206d0 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_030/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d0ea8f968d2641601457d22118ba7a4370ebf2508264835e9e1145e64f82aa +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_033/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_033/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74a033eb2f899f026f37c44b577f2260a8ff4999 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_033/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb3428f7c1d1cc5a9f1f24f106671d15ac6fa7380654509208349eaca2fe968 +size 3477777 diff --git a/eval-results/medmcqa/5/ckpt_033/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_033/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6d7c0d8b3ef3a7efe1949f8074c2263d7e3f3f2 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_033/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a04d8dba7be4680cc070cebefc34cb8ea22ccf08089c1429a918e716661e8cc +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_036/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_036/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a11f401236bd966ade69aec636a17490e375429 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_036/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52ec578e493feba18529b20efd160f7fa0daf0b8be132151d29f49a12dcf94e6 +size 3479865 diff --git a/eval-results/medmcqa/5/ckpt_036/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_036/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..916b01744562fc1d1cfc7251e62668e192376691 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_036/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86432c54ebb7b98fed77768a90cd092d79e52530f7161f9fcf0a917ff6beeb0f +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_039/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_039/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5a40b0dbfe981a24703b32624771224404498a7 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_039/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5789ea5eb0dbb74d0b01b1e70657d9032e58ecfde0f9f09d8ee6266e9e1327f +size 3480982 diff --git a/eval-results/medmcqa/5/ckpt_039/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_039/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..525ed456678b8643c4e2630703078b45aade029f --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_039/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11c06a260c599304a29297cfb03736efe7678ddb640756209bcde67326ab743b +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_042/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_042/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cb46b2400fe442332170c8681fda7779c0709ec --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_042/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d18acb33d753894c0f16cf3256fff0676db63b977b22c9767581c1834fe9716 +size 3480665 diff --git a/eval-results/medmcqa/5/ckpt_042/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_042/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44a7e10254c4c7662a515e0721cec0866f40a8fb --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_042/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aa644fd58873991fed64ebc90fabf982374e207d58a112a7fdc376bb97de61b +size 2860 diff --git a/eval-results/medmcqa/5/ckpt_045/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_045/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc46e0624f757cb38a86128fd7c4a70b8f9c828d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_045/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a50c0c6938076c7ca771f6d80b1584a2be8a9ec43c752a63e5c6b215846f58 +size 3482250 diff --git a/eval-results/medmcqa/5/ckpt_045/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_045/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4b6d389e8575e3982f634ea40ecefcee3aa4a44 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_045/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:721ecc4f126703dde46cabeadfc04d4b065fbb07465be05b7749d8244788bd4d +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_048/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_048/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58849f0bcfd0fdf325a0003b796cb06505a6d072 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_048/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ac8a3e4bb34da0444fb7138dd3512fae3b2114766d9dc60f53675ccb272070 +size 3482786 diff --git a/eval-results/medmcqa/5/ckpt_048/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_048/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6225a9001c4c5dbf8b9ae9c53fa2c50b4ca98073 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_048/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5323e4c09d0b185dfa1cbbd4fe3f4c202e1fe574f67ee3266822f272ff9d341e +size 2886 diff --git a/eval-results/medmcqa/5/ckpt_051/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_051/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65256c265c627a5aa2bc08752bf6a2e2714d37ee --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_051/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c360b8d28de4e6665980ba6e247d156fc4389024d6404b8eadf6767969f11c40 +size 3483277 diff --git a/eval-results/medmcqa/5/ckpt_051/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_051/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b68aca9faa180dd784d1c9ac4d5cc8750e98fb56 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_051/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4cef8724a62cc933835bae70489e81f650a685a3a29e4e06bfefa37a67e8f65 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_054/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_054/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba42e7743bbfa86f074e0a17bbb1fa6c46028f55 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_054/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d9866c5a7ac66e427732d010a440e87366634f944e6f8f5136b2c529dc8db8 +size 3485467 diff --git a/eval-results/medmcqa/5/ckpt_054/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_054/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..938425e8eb77c535d1e249aec90d006cf7dfee5a --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_054/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c83b29d93a9b52d318d56e59cb1079ace56ce34fdc252e33803f02b0f9420749 +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_057/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_057/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59c1c6d6954b62d2e1034be1099d40580b80b04b --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_057/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4d4d3d8ed9e199a75f314f361dcf8d56c29f6a0f2f23e4a7a47faf787fd7091 +size 3487849 diff --git a/eval-results/medmcqa/5/ckpt_057/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_057/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c496ebb0f8388346fe4c3545cd9e85ed70db9992 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_057/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3919a90eb0005f06692519f72c77c7bb6a1ba22c333d89be64824328a8577476 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_060/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_060/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de9e1d2e3590ad7742913d1f26ae08323a324275 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_060/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e51d6637dd071ed09109abf6e7b64541f19cb769bf7f5751f682ec845411a86 +size 3485421 diff --git a/eval-results/medmcqa/5/ckpt_060/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_060/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fe8ea448cc71fcea9847fa4ba76cdeb8b0eb7e1 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_060/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8571cb4d1627d8e7385f2afec84fb807e31b7b9cca3482d0c5273431609e2168 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_063/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_063/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc3ded5267d775d2ce07516b5e4658fb38df25eb --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_063/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f447449aca53882e78eff091038f336e244a6c7620c0bdb766ef6fa60463f708 +size 3485825 diff --git a/eval-results/medmcqa/5/ckpt_063/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_063/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46d1ea7e49b8b28cff6dd181397c9266715a6616 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_063/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539153e90732421f5c4ec2524fb14fe3b20aefa695117610a8a66d9042e7df74 +size 2884 diff --git a/eval-results/medmcqa/5/ckpt_066/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_066/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..368b71a7e6bab2aee052e2533564dbdd917a9fb0 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_066/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b41b39b548abd41da114f2aac2ef5bed621dc37cb330e5f39145f79e71c7331d +size 3489987 diff --git a/eval-results/medmcqa/5/ckpt_066/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_066/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d6add548970a0588b3996462c0042ebfcfb1d6e --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_066/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df74d0ed1d903300d786d473cc6ebfc671a7a4b9b298f05bc3b5ec3ec442abc2 +size 2853 diff --git a/eval-results/medmcqa/5/ckpt_069/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_069/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e2673e906b5b30e26885e81e483b2d9d4052303 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_069/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d14ab8c9f6f08744743ef401b674d45f8b0b3b8e7f63a5926ab94467c03362 +size 3488410 diff --git a/eval-results/medmcqa/5/ckpt_069/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_069/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9aa66d988c3fdd4ebb0766d72c1680d28822a1e8 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_069/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c37c426a9b05945e040abf99c84c0da63264999948d1d03d9c60dee9dff8c7c +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_072/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_072/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24202b61dae6ebfbc98fda6cdb53591eeaab5d66 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_072/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c070172cd95acc6f483b1fee76782a54f6d97af0da3ef637b41e42be1c487f0 +size 3486622 diff --git a/eval-results/medmcqa/5/ckpt_072/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_072/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac0e58bda7737dbab10d653a7b4eee43d9d68f49 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_072/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b8e225de0a62f4481f19710e264ba3b24e88c64dbfdb77bb04e8ec328ea5e0b +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_075/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_075/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3798217e95827e8cd8155cf8f55bf3d0fbf0eea5 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_075/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa3ee3020b7a0975e942a18ced43be0fabca7768b9eb75bfb0765d5e13ab4f4 +size 3487791 diff --git a/eval-results/medmcqa/5/ckpt_075/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_075/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f57feff472ea83b8f0b574ff9f9fec71c4265a61 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_075/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d2e6b52147b1c3fd4abc174fc05fc34de08a40b046378df452f33368d1edea +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_078/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_078/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47528b88ef574c9437f087e4ad260acd0ce64f99 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_078/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c85a1b95dc8c0cd8180f1d962565bd366f19ffd92c6142eb225d4afe943676a +size 3490003 diff --git a/eval-results/medmcqa/5/ckpt_078/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_078/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f16eea36af5ec7b289c014fdd9966d75e397b063 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_078/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b04898a85ffdc5bd768495a5456d8179cee2339e2bfe4248e8f83af28f0013dc +size 2888 diff --git a/eval-results/medmcqa/5/ckpt_081/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_081/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b4c1ab70f82ef0d0759f6fc6e77924faba05739 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_081/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c8798879bbd115a09f0c47764234bf29c20b746656e6eb196a11ec5ff11b809 +size 3489205 diff --git a/eval-results/medmcqa/5/ckpt_081/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_081/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edd1188856e11724479f6d790b3f359f346a3a78 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_081/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ac1a7fdb44cf315e655759f808c995cef27206f6d8888bd93c2fba6cf1beced +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_084/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_084/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..586f12446a6ca66984ed17ebb51c0bb17efe909c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_084/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61545da7c4157ad02a3fe860f5b5ef5b77d2c697f815a320d491f76698e72d66 +size 3487548 diff --git a/eval-results/medmcqa/5/ckpt_084/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_084/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a76b29556a6592f07d4c88e74ca24ab27f327923 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_084/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a38838910408531baae92dc113cf0deb84ce27e65b46da1af125497ce21cadf +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_087/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_087/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e06252d248b8a360bd87fffb321f3d8dfbdc7b36 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_087/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b3f4420c66cff078aa56bc7deb5e8c2ef737bc88b346bdb2ea3ebfd62f3d5c9 +size 3489944 diff --git a/eval-results/medmcqa/5/ckpt_087/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_087/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1fffe4ea83860820f404b3a5f7cde2bf2a4eec8 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_087/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01618b1695a4ddb19120cb388158d8f7784b50f76d6961ec172f5433633abfed +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_090/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_090/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f760f18dbe7d5013f0889a00998596860c67a115 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_090/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43ff99148bc795baa6189aece5c4ffd729bcb7faf0b3658e04413faeafbb8dc8 +size 3488703 diff --git a/eval-results/medmcqa/5/ckpt_090/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_090/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee61f4bb57fdd9ec1abec0b038cf8c474cb0d2c2 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_090/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cc332cc1fede82912b5362fb9615794d1e1e0fa893bf623572d417ac17e7bf6 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_093/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_093/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e3a7332ff1e76bc27efb6eac96da5c6464a1f2b --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_093/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0006f609211b2b52aa7284f85925c5fdfe0af503f0f75d619a36ee67f36963cf +size 3490175 diff --git a/eval-results/medmcqa/5/ckpt_093/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_093/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aea829857d4f941f730b7a84f8c78460d1034f04 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_093/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:884b41ffaa46bfc603b5b23524345a8a1c485abc1e6e49c36f7c706b5b9a1072 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_096/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_096/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e71c5cc87b1f4ed14f42ddb08f2f1e2f40e79409 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_096/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9e93237314f691f404986791ccb7e140b8b68e6e8a6c3b735f54c7f3b0c009 +size 3489944 diff --git a/eval-results/medmcqa/5/ckpt_096/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_096/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c71041f63da954b399447f618d400ee06dbc570 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_096/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:531ecf8aa897edb00769b55d33f935737b977be2e20f6fde5714e1b5fcc0c1ad +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_099/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_099/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4e34f660629dae4a1922989a92692e81b50749d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_099/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6168a361b2a76e1b17f7c8ed8eb0a34c98e7e033d613ea033a7bf691ccf53fdd +size 3488446 diff --git a/eval-results/medmcqa/5/ckpt_099/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_099/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..717291eb5a5cf4e16cbe397d2a42d8c32ee85a0b --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_099/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a58a0e1e7cf92b5bc2c8db89ed7214ed3e5992efbcaaac270bdb6ddfea0768 +size 2853 diff --git a/eval-results/medmcqa/5/ckpt_102/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_102/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80b2ea10e93552f5fa542d08b285fe6c1f66cd0f --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_102/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28c17c250943c9ac31d457c28dc2c72425b1a1b0836fbe950c820ac133ae3d94 +size 3487605 diff --git a/eval-results/medmcqa/5/ckpt_102/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_102/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa76ca9b4e302da56452e875ca34717dd36fccf8 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_102/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929b51c9ff001290e386747a616443e589661b1cb1810ce3073bb9c042cce5d8 +size 2859 diff --git a/eval-results/medmcqa/5/ckpt_105/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_105/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4cf12aac9cd9e1843479aaea0df320315a82021 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_105/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3c13187c8a31e9163f880c8bb04299a804a45796b2dba205ec28b14b8fdab2 +size 3492742 diff --git a/eval-results/medmcqa/5/ckpt_105/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_105/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3da81195e3fba5fdb66ae89cb49f9e65e598430e --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_105/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0f97b998b1d5316eef9583389db3ba8bf6b5d51953e2e694ee1ac711c6d3458 +size 2853 diff --git a/eval-results/medmcqa/5/ckpt_108/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_108/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b006721c8ec2f985d39ab24358c4e0f13d7dca9 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_108/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4addd82d175739ba2c458d66ac663c69f511858faa83bece05d20732310440a0 +size 3490795 diff --git a/eval-results/medmcqa/5/ckpt_108/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_108/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..026c2455b57e84c799fe86db15947708762d9b65 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_108/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0f93c2224e3f6b1864b9a11dd95caeca17a2fcf2e74017f3f35597a330edff7 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_111/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_111/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b29af6595553f22f9114172735fd052ba97d91b --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_111/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39af4300f70869e6c4f07f82e30d222b993a9e76146ff61ab79524fa1c40cb94 +size 3490661 diff --git a/eval-results/medmcqa/5/ckpt_111/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_111/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0cb5e827d32f04b341221ca715c2f5fd936b18a --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_111/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59bd5f1ecab5fb39330c346f34bb74ec227acf58c106e120b0f9c53572ae1706 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_114/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_114/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0686189a53ee83c75560b13c1fb13614143629a --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_114/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2772e7b022e4d55fd0ca14ed750eb65fd61c9e3bef15848d7b4baf214cb801df +size 3491062 diff --git a/eval-results/medmcqa/5/ckpt_114/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_114/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..398f4306cd3ae9395d0888f0ede61d87dae334b4 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_114/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84632a6a588e97f16a1fd97f798c6ac7d5641f7d970fc680cad0544aeae9760b +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_117/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_117/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5cdfdcca0ef1f7e584aade8c90f80236c8c243e --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_117/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f3e2f9ddec0d4f7d90555ddf8fc68d1159f31fdfac50a839765157499910c17 +size 3490485 diff --git a/eval-results/medmcqa/5/ckpt_117/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_117/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22a339782c91c371d27bdad5664c4ebd92807597 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_117/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2df9b79b1821e002753d9618d639f69b1807fee8b94a7c66c9f05d15689d95c5 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_120/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_120/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94b11dc1c118a91c2d25d54cdb2f7ddcd284e19f --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_120/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89681563daf50e87a6b2c0f5d21d810aa5cc7665688f9466ba22d48264937eb6 +size 3491668 diff --git a/eval-results/medmcqa/5/ckpt_120/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_120/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd3759ba65901ce745d7d49db1f15c3061bc49bc --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_120/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d43d0f8ea4b7827957a3d265b9cd4bb3c4667125b9dc2ebf6dea3e3b8c8ec5d +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_123/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_123/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e288dc272c4d985d2c0593de94aadfbff09a1c92 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_123/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7b46a6746eda720abe213418447faec606e4d97a98f24d675293bfc098833ef +size 3489362 diff --git a/eval-results/medmcqa/5/ckpt_123/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_123/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52fc386a501b6060a8ccca91b658e1b1f7371a71 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_123/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17577a3f42cddd34e6ced31a0aeac0cca828fd62045d80c579475d0f05e1a1fa +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_126/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_126/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f86df117c2d6fd15f854b214531d7ff839167f8 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_126/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65a9387698fabbdb2593533bd240c0155cd4708a9507ba35bca9585852e73390 +size 3490344 diff --git a/eval-results/medmcqa/5/ckpt_126/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_126/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99786b57783c0c24ed8aab11f068adcd549479e2 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_126/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63976c4b3135a86bffa22937eb77fa6eba6fe69f457f20d783697ea50730f74a +size 2854 diff --git a/eval-results/medmcqa/5/ckpt_129/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_129/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2801a119d46945c94097dbf730a2271cd2cd57fc --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_129/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0df9f4120735489a71f258cd690b30c3a180d78b427f61bd733dfab24c89ba +size 3490785 diff --git a/eval-results/medmcqa/5/ckpt_129/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_129/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e00a951d734daba2ff5034fdb98ec71c5b7e6b34 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_129/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb327d4438b1eb3cb4296b2a24a10b7b4f99e9b27e4b423087222bc6c0ea084f +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_132/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_132/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4a857c3300bddc946f5d2af575dd82f507e943d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_132/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b01b0619b7d7109b4972a026e68292e8f8c189917c84f1ef1820bb014099dd +size 3492900 diff --git a/eval-results/medmcqa/5/ckpt_132/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_132/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b408978d6e6b117dee0fc9aebba85e9bcc18b55 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_132/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53bb62d693a9f85c608c2740e171a0ebd472b2376db6b5fa6ec809f77930c970 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_135/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_135/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9811036a090e39903028b0b28f892640ed286204 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_135/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:067ccf4736795f74cb3ec3398dad5432dc9bbb598a6e10f7c960d7adc57cad16 +size 3492105 diff --git a/eval-results/medmcqa/5/ckpt_135/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_135/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f922996b92ebf5d519f7aad3bef79629636abc17 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_135/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05dff78e22c5a9168678e10f289983d4c6f13cd0d2397eccac0c17cf2e48331f +size 2854 diff --git a/eval-results/medmcqa/5/ckpt_138/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_138/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0a272dbdbd18fc2810fde99191e62262ad13d1c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_138/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb541a68d94b331c72ccc6c3b2a9e37c8dc696b55f3049a31f591e966d72a4ab +size 3492177 diff --git a/eval-results/medmcqa/5/ckpt_138/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_138/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdadf33fb4ef9cbe37dd8d35ce9556c12a6eb000 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_138/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4599076e70d494194769c1ae030f14c69f8a0cec734397887f22a674d109f6 +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_141/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_141/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f78151fa9a2d9ad291d726e1573064f0533b3d9 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_141/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57de59074d88a2627377206a9247f84bed28296539ce9e31257ba6a1d846d939 +size 3491509 diff --git a/eval-results/medmcqa/5/ckpt_141/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_141/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6814653670cc176360421875d0b4734b02d61202 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_141/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd592a421e3c2c57ef58e680feb0d3a3a004c56dbac012c32d28806bb5cdacf +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_144/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_144/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05ffcd04fc25682bd7298c11b8a803577a7359d5 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_144/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d15ae44d882f0379c5119d52717298d2769102d5d37777f6193b7b47f4e8e527 +size 3492011 diff --git a/eval-results/medmcqa/5/ckpt_144/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_144/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e15ec29dad23682033fd7a286666e1e79112eeb0 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_144/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69526d5b281912b41972df2182ceb5cc39eab87a19f3cd44687b696e19503c35 +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_147/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_147/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a8e13611dd60dc55f8204391811b43a26d97f5e --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_147/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba292e79d68fffb2000be5f8e67bac75a1935d94adb2e6c6ddcb9208fcbe9f0 +size 3492472 diff --git a/eval-results/medmcqa/5/ckpt_147/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_147/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c370a8216aab2a2e4bb9135c14ea87ed160e609 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_147/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be5f09ce260c5bcd14a8c61cf0835a4c3ce23216b12d274cacf1d96ebcb4937 +size 2854 diff --git a/eval-results/medmcqa/5/ckpt_150/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_150/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..192e977f63000673e5fe1d29c8a7eb34019cafc0 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_150/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5541540cef461bf93caff1bed1f50e31844e74dc64a17f03fa28cfbcaa17ea +size 3492334 diff --git a/eval-results/medmcqa/5/ckpt_150/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_150/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d1a5fa9e72023bc8c169f17a1f445fde35efcdd --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_150/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3abee8e793e624b55b6c2f6d4c3151c99d33ea73ea6a77c6ffb0306ca0db0e2d +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_153/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_153/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e97d0312d26e3f070dea5eb099c21118834e03ae --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_153/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d8f83f49f0af6d74e5288e59998f5475612db4c9140614c738d6a020be8be1 +size 3492977 diff --git a/eval-results/medmcqa/5/ckpt_153/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_153/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72e820b4e5e052ec1828182d723a745eb7b307a8 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_153/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:916e5aadabbba7e4ef2e6aeab9aabcdb7f2ae7a8de511b24d84e9e4bd1766782 +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_156/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_156/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5c40bec6104bdeda82e938917254e02a4ba764a --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_156/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cf650aeffd774dc296c683dcd7606b5bb32c946a5e181e95b13366033db3126 +size 3491830 diff --git a/eval-results/medmcqa/5/ckpt_156/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_156/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..907a0b4f34b5fd9f7c96ab2e4f9595f08a4b69f7 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_156/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d009d05aeacb9e77bfcd72bb536629750a720c54a8064e5ec84804a373b2f12 +size 2850 diff --git a/eval-results/medmcqa/5/ckpt_159/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_159/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7eeb2f8cc9386e0306451bf2109aac9ac5549b4c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_159/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:094670dea36d73ff0a8a639c6d769c31889ff56db7fb03d773b90cfae00cd85e +size 3490766 diff --git a/eval-results/medmcqa/5/ckpt_159/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_159/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d912e741f2518078276e3c20d1990de0ff4439d7 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_159/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5b7cea4f3a686ea6c35dabbb6664a4967db6c74b7a15ced8cb2ec1c7b738cd +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_162/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_162/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..570338d03e19a409530094636a566931c87cd263 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_162/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:546064f09cfff6fc39629aeca1df45bd98d078afface4d25c7d5267e73bea1ae +size 3490468 diff --git a/eval-results/medmcqa/5/ckpt_162/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_162/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c291e8785a0034a90ab4f67bcf1c6616f01e75b2 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_162/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef884aa67be6722c4289427b6f80bd828ce4e83bc51abe0cdeb6497608f3f18e +size 2854 diff --git a/eval-results/medmcqa/5/ckpt_165/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_165/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94ad340a1acc2b0f99ab20c36c0160dd05e401dc --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_165/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:303b00f0ee9edc625fea2a95924a96744d68bf77667a8aec6c6bec0913b6b11e +size 3491849 diff --git a/eval-results/medmcqa/5/ckpt_165/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_165/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b74fbef4c4e061119ea010dd691c1c67dddddf9 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_165/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72da23dee5fa16076826d94052190f9711e613012201a1a7ddad7e60235902dd +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_168/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_168/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c19c03612b31b36b9b11f95f1c7a9e5e57679ea4 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_168/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0e317f43630950dddb65f01c0dbcaba648ce129aaa9bf881d5e76936c1af9df +size 3492110 diff --git a/eval-results/medmcqa/5/ckpt_168/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_168/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c704736e9ae1fac371d76cbbcd7fb1e19c95da48 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_168/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58bf7c5cd2d64120093148d52e32b3ef4b389acad53f70f4b9552c2636df3296 +size 2852 diff --git a/eval-results/medmcqa/5/ckpt_171/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_171/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b014687dca68140baf1b8dd67b860a51679e0da --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_171/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65e42507e8d4b63bcb6faf374a7ff79d43e15e509305f2207cab9bc8b4323d5 +size 3492016 diff --git a/eval-results/medmcqa/5/ckpt_171/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_171/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50191c2cb5c7e638d5074cf8c8ec69484aa0ed6a --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_171/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6669078c0c3ca3900852a81fb8d946e4fb50a6d49d04d3fb35e9ed2297ed77cd +size 2850 diff --git a/eval-results/medmcqa/5/ckpt_174/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_174/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d266d64bf063b7aca725bc2436290bae3f1af195 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_174/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8935884d4d753f5b6a401d0efe1bf9af6375cb6731304fc9822ba410fcb0ad37 +size 3491732 diff --git a/eval-results/medmcqa/5/ckpt_174/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_174/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00eed1040a739fba2978867cecd82806fc7ab998 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_174/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae36d4431d4e9a9e9899e11d17f9c84c0fcd4e78f43beabbb5b10a40ba953f64 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_177/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_177/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b081ae677401c979355bd8699e156b78b4f1d977 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_177/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:056ba3b95a92fd6446138289dcd0bbf661b98af9ada2638b1c3ab71f7fcb5840 +size 3492611 diff --git a/eval-results/medmcqa/5/ckpt_177/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_177/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29832ec77434dfebe7122b3d56b3ecccf5bd68b5 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_177/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bcf4914ecf826f667572f7423422d5ccf3f82974b20db7ecff3c3aa0e587ad2 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_180/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_180/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53ab8a6748b35d48d07fd5d66c3178e8cf3d6c1a --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_180/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d6371745b5fead73a35f9cd4244dc69fdec1bf0ea8e8f18fd20972900b12696 +size 3492703 diff --git a/eval-results/medmcqa/5/ckpt_180/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_180/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bda256fe094c4bf48ff9b3eab07db2adf53bbb8 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_180/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac944f1d7791fd9a9696c1d11679a3f2f74f9b589cb40aa3588bf55ca54f6f21 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_183/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_183/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ec3dd7978d40fac25764140b452c2f29b165d5f --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_183/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10768729367c2acd95265e7894415cfabc4153e477ae3700ed4e6673f4715f18 +size 3491284 diff --git a/eval-results/medmcqa/5/ckpt_183/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_183/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1fe5f725dd4cc456701469204cd3d4c9bce27535 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_183/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac89ea2bad9b4cea24976818295a54334b1e2d53b11654ba9b82a7b197ed183 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_186/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_186/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70b0a70efb123288fe25085cb1e59f1617b0b249 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_186/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3fac5c7bbb85cec0bb8938dec921c9ae8411818926733473937fef9dd22058 +size 3493014 diff --git a/eval-results/medmcqa/5/ckpt_186/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_186/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b895efa500a3d23d7dba6e115e1c7a4dbdae8e6 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_186/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1b2ae218d3d6626b70523e9c02f33984f28260294feb8b93ed5f75b866aeb7 +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_189/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_189/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76bafcccc4cc3153bffd0c676ecfb082c593f59a --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_189/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:383dc7bec777d4b497fc4fc6d3719e8dc0e675a42e1678c227ca1199dbc02ca7 +size 3493752 diff --git a/eval-results/medmcqa/5/ckpt_189/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_189/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a068bb622d7b5178f889f4ecbde7418f653bc27 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_189/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70bff6d4d84300933a460f186f6db67c056f5e6ae3a56cd197800b1872b9e453 +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_192/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_192/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..921c2daf29e936cbd6ae911bf1692e123f2fc263 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_192/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f6b8ec285c1efda010f9991499999cd6fca4c7ef8f1ccee168ceb5933ff7771 +size 3492918 diff --git a/eval-results/medmcqa/5/ckpt_192/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_192/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b59937997cd1dd09e4ce7a6de9fc835f9ed6bf97 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_192/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e335b90dee1e2ee9467af7ab76da4aca5f18713c0007330b9c2629e76db831 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_195/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_195/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c23662a675edd3bfa3137f39fb28dbe33694104 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_195/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e73cbabb924a3f00689a30001e6c1a7898ddd58c409f5f2fa2f74feb899505be +size 3493662 diff --git a/eval-results/medmcqa/5/ckpt_195/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_195/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00e7ec7d7f3d2a2f1929c167ad43b77e31ba59ea --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_195/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:848a5a1968c5a9689ac9766fe298644ba9ce6458f0b136e1988024302146fbee +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_198/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_198/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..297588828024c00f4aaf3adb3e7c00e276d238d6 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_198/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40e232857340a2ee33021c9f88b7d26dbb7be7bae5887960c9d962423ecb2611 +size 3492905 diff --git a/eval-results/medmcqa/5/ckpt_198/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_198/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed75ff902064365ec6a04776729abe41c32a7cfb --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_198/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f5077aa77ac7dc30ed2ae3f99d5c5005ade104471804febe662bbfce89834f +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_201/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_201/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0994163226aeec765d618a4aadd2ef6ecf5aa47 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_201/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5b3ee67bfc08222bea5513845d113282f19a42a6897083da8d20484f4c3cc57 +size 3494466 diff --git a/eval-results/medmcqa/5/ckpt_201/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_201/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f32fe12e610658ff28fdea63ea17a831f71ddb5e --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_201/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7713a743e770bf79e47d47108f8c85bee267282b7990337191a29018f9e902e0 +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_204/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_204/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32a13b0bc48771e880f48afea828037c7a4a2dc8 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_204/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b52ce12023730f81d5cbb8d213eb34dd3bd82d686a8a5ca2b72f6dfbe8cd97c +size 3492951 diff --git a/eval-results/medmcqa/5/ckpt_204/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_204/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..975acddf2d7a7c0091e832a3f35816bb07f0ba53 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_204/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac501e20661b0ed0f503de10ce31fd02ed260a875b134ce55ab5e242c94929d +size 2852 diff --git a/eval-results/medmcqa/5/ckpt_207/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_207/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a160a9a14e9632448fdd99b8bad69d781936efc1 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_207/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72942cd90f93f8a4ca18f395d262f18a10fb0c2adaf28f6a9d53b4ea335e1880 +size 3492778 diff --git a/eval-results/medmcqa/5/ckpt_207/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_207/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2bef79dd7e5f70829ebc30d484f82e255b123c67 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_207/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:963d282c81ec8b0583e9ee453d92030b0155614c193a5e9ff1a4712f51438e17 +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_210/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_210/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4cbcc740e76ca32f925bc2fab8343c24e3425c8f --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_210/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a033a22607ed79c9794bb3c0363c87e31064cf0e5c7c6f8e12f12edab82b8cc +size 3493413 diff --git a/eval-results/medmcqa/5/ckpt_210/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_210/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1db04ab9a4bb5c7fe239d50beb2c7bba43a56b6c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_210/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700270f70a13337178780e11cb5827a2866fea047fdde51f6f6d260a8822e75e +size 2852 diff --git a/eval-results/medmcqa/5/ckpt_213/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_213/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e3abe46d6922348cfde6cdb84a2d88c657f6860 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_213/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31e4f4a9ab98a11352999d553ddea19df774ef372e484a9cd71c1d2378ded0c3 +size 3494611 diff --git a/eval-results/medmcqa/5/ckpt_213/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_213/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4d1b774c8b7801d856fbd4f886e58687b162bb1 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_213/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41bfa2d8bbd7e5541f2d4b70ccda5a737bf8faabb35ef37aa030254ca5941257 +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_216/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_216/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab74b641d8ade29b9a2d7e6adf4006992657ac58 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_216/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:171b9f2bf7539448b8f1cb7e9bbd54a0060121cb5d249b466a45cbf074514e4a +size 3494141 diff --git a/eval-results/medmcqa/5/ckpt_216/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_216/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7c03a55c1b1d4d210a345c596f4f2859ed19b9c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_216/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f701a222885f8b1070ab30a6a48ee771293fff7548cd2ff8895556a09258453 +size 2854 diff --git a/eval-results/medmcqa/5/ckpt_219/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_219/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36cd46de871888f94a1a19afe67ac6ee1b1ba293 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_219/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:815a69ed37f92ac8af481781acde5dc15f225cfa099af788584753b3ffa00577 +size 3493829 diff --git a/eval-results/medmcqa/5/ckpt_219/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_219/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c640ffef6112ade5ba3e9230d7e8d347209ab91 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_219/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15ee35b0cdda0a654785f09a46c2cf3c28c9d9c0ee2f38163c637198df467ccb +size 2854 diff --git a/eval-results/medmcqa/5/ckpt_222/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_222/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6bb428034bbc2e8b8199aae877a087dc1e434798 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_222/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c04db2f3cb00088f6bed5cb680dcc5fefc2557b511770f0376895f2a4fa5c8 +size 3493157 diff --git a/eval-results/medmcqa/5/ckpt_222/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_222/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9d86e18c3b8b0cf7a724588a3f301181dc3d06f --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_222/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:148f595dbac85c3c09ad55f6a78fb8c617e258a13f90bc97096698344e3e0bea +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_225/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_225/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d402eea786f236a697a0933a69b86187b00b74bc --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_225/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f724123fdd3c862cf398d77fab9c0bcdb327fce9e6bc58a8266bb3daeef7cf3a +size 3493747 diff --git a/eval-results/medmcqa/5/ckpt_225/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_225/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d83f68cecbe66e43ea4da492a812864b8144315d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_225/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4deb35ad7d587084140f4a152c94cdfe4d4a8933a633f34ac6258444adf303d8 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_228/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_228/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..735f1f01f6cb2c75075c85e8ec3efab3d6355cc6 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_228/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43ff928401b3d059c1493355a9acdfcf20e5069f3ede5866c296a1babb9de352 +size 3494998 diff --git a/eval-results/medmcqa/5/ckpt_228/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_228/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6752f1c1eb3cf72faf1c73ffc18a94b3ade3e6c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_228/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50f456e868113a37bbd5c3c71f650a703a536940f90594e2e8f12a73004faf93 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_231/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_231/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83c60b8e96cc6cb6fbdea7b564a8d25e864696cc --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_231/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd4e6cb92f7d1069a9017259d1206987a6bb4886ed0c4cf7c779791f17352d27 +size 3493371 diff --git a/eval-results/medmcqa/5/ckpt_231/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_231/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69e5ad91d84ee8370f0e3d7f5f776c55eff657d1 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_231/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:559cc02641c927f7e18b65727c3eba1fb837d6a253f29ae5de82be8972686fe4 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_234/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_234/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..241a7e403a69a0e40f4969c83934cb723d438092 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_234/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab047945f497af19300558a2538f6de0e91bd8938d79ea392b41496a8784cd63 +size 3494210 diff --git a/eval-results/medmcqa/5/ckpt_234/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_234/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8455489cbc371803cc4d5851ff2e03606ecec83 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_234/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40fd37dc093d84a2318175a9ee9501a3ac243daf67a22bf51b55d42017f43bd0 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_237/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_237/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72b740b912cafdde99e32bd26592891ac5d27935 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_237/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:313eec5f764ddb9e540e07205c67763e7691a38a3b965394d59d971dc3e72657 +size 3493069 diff --git a/eval-results/medmcqa/5/ckpt_237/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_237/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a97b35f854a15be449672ba4376bc669932062ae --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_237/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2609cab7256bc0f475470e7980e5c00d7b352f6badd6cc3f6e424ea7bf9b29c2 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_240/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_240/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e053735e9c54e2e613d364114c8dd34cb58e95ae --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_240/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e94a3e7bf612596caa0ef878c59e49b37d6e1f924f335c6d325add3ae1bcad0f +size 3493286 diff --git a/eval-results/medmcqa/5/ckpt_240/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_240/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60fbac861ada706162f94603ca4aa5eb3e6aa3df --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_240/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd546613ed74515785ea2401692c8cd66ea5f135831c8b143ba95cfe752ca669 +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_243/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_243/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6836b3a0eb8b63e63d02dddf1115fd4ba8958ba --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_243/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b1d5920b2266df2365dd5e02d9c6d79b64aa79e2926cb0f9f1219a8e59efb7e +size 3492777 diff --git a/eval-results/medmcqa/5/ckpt_243/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_243/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..493797f1586de801fa955b56905cc1a4b1ae3450 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_243/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea8e04d1b64e5e864805d1ae58d5b6affa54b9d4776a2ecb1106bcedbcf0c507 +size 2854 diff --git a/eval-results/medmcqa/5/ckpt_246/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_246/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c447e079ab71cc9a2aa8a8ec09ff5abc668e629c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_246/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b71fff06d75eda11b52a027e6cfe50611c220069c2dbffa6e1d4081c382569 +size 3493122 diff --git a/eval-results/medmcqa/5/ckpt_246/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_246/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2919db1cd93729bbecfe0661e8eebea3bd997a65 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_246/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63db47bd28b75f2b27860dbb8e881090ed6033bc08f773e47d762117bb76e03 +size 2884 diff --git a/eval-results/medmcqa/5/ckpt_249/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_249/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a474e50f0ebc7bddf076d4e158fa5c2f5e75e287 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_249/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3581128eeaa415d9be086edf4c1ada3c16855e6ad167d5f9c71421717f80c187 +size 3493089 diff --git a/eval-results/medmcqa/5/ckpt_249/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_249/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cffacaa0ba56a8a72728218bc2f9a852fc48293 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_249/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d8bc1b8ec3a83d296800d53c54f75a8ab7eb3f622a7b1a28e7e888e6623c5d5 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_252/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_252/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..171d6fe852a7dca5617e2dd4e582d4bba33852f2 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_252/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9334b88c546f982017b4cd18d233dc447ce4e95eef9251e353bb4c39e26c2b8e +size 3494507 diff --git a/eval-results/medmcqa/5/ckpt_252/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_252/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a76f0bc9850e25525e7090f9fb4ace1ffe6af60 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_252/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87cf3b4e799d627c24b2772b2ef5efe4fa9ad3b67717d5e007470c1e2d3e1e51 +size 2853 diff --git a/eval-results/medmcqa/5/ckpt_255/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_255/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9564e4cf0204b6fb539f956d0c375f57d2002d85 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_255/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe0231bd78935567a07139fdfbb032c998c7ddca85fbd5bdc081b97bbabfcc36 +size 3494802 diff --git a/eval-results/medmcqa/5/ckpt_255/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_255/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c90921cfb8def29ed42b6fe41721ca137a332380 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_255/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed84f3a29cfc739849fe82477e486551aa287dbd4a0621a3ebbed2a16643e01f +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_258/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_258/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdb7896c5297bddc79bfcde051d09f18c9e28b02 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_258/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98008f1bc3d377eb625f5ec311c604d8eebe93b65a53074fb19000a9c6f61d06 +size 3494880 diff --git a/eval-results/medmcqa/5/ckpt_258/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_258/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ab642a23fee2285db7aba433e324a52e17d704d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_258/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e28d2e9a4d9ba26e1a60de6401db754104684b956ca5d306f3267983d7274d41 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_261/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_261/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9dd643bcf44666a947b397d4acada5da122ff0ac --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_261/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c370f7b5fca5257ac1fc61d93457a88f685187b15f593defce65f1dd13ac95 +size 3494745 diff --git a/eval-results/medmcqa/5/ckpt_261/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_261/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..941a95dcb1a21b5dc939f95a6f0018c5e1e490e0 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_261/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22995651abd9f4a56ca63af9296632465ae5fb2d9524298690ed6ff367d78cbb +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_264/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_264/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa25c3e0e5302dfa66f6e0864e34b098a4159bb2 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_264/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2027a673fa7bc1cd75c657f97714961745bb706bae6d30e42ffdaaaa9f6ff4 +size 3494134 diff --git a/eval-results/medmcqa/5/ckpt_264/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_264/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44f6635f5eaa7398e7fb8b9a44dd9d3f0393b5c8 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_264/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cc124951e501dca0c0fd4c8946a26f159e2cba658029954b27b34e3df6812cf +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_267/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_267/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eaf2e08597dd0f7e30d6409e834a03d68c5f74b8 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_267/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f0e03ac60553d6afe3ed1ee594891620ca0e743e24e190f5a90d1410cf96a4 +size 3494840 diff --git a/eval-results/medmcqa/5/ckpt_267/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_267/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4347e9bf820f77577d15f33f389873bff4c1fa1d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_267/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca68a4ea51d4121e5e8841840bc0f72548338f7c8f95e3b5ed3d8418d4ae3065 +size 2854 diff --git a/eval-results/medmcqa/5/ckpt_270/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_270/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7759c0308def6cb0149f440787545889d112a408 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_270/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b919b896dbc459ea9ee603b56d2dda704fdffc5f9666bd5cb54c20fe1619f90f +size 3494245 diff --git a/eval-results/medmcqa/5/ckpt_270/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_270/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd534e96cbf2b9d206bbe3ae57647746e3c2b325 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_270/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93e07d701a3a9c6d19832c516dfec993fdf98c6b8aa94653c3c4498eecbd53e8 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_273/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_273/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06bde80753ee2e5f69a1b71d18beba97375e600d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_273/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d73f9780c08d4c2d8a4c2b2c52ca93977bb0aae5aa95d9cca63ec0c99c13e66d +size 3493828 diff --git a/eval-results/medmcqa/5/ckpt_273/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_273/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8207d8aefd7a64fee77cd412d57324b1e717d680 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_273/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9454d2a7b6c4b220fa0739f328bf1130c31bbc8805e0db7a31afcb2aed1cb463 +size 2885 diff --git a/eval-results/medmcqa/5/ckpt_276/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_276/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7ca210d6822ed559a95f8df482d1d56af07a6f2 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_276/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a141b72a894b1f57f501bdfdea731a84f283560bc8656bc9eca88389fa17c65d +size 3494259 diff --git a/eval-results/medmcqa/5/ckpt_276/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_276/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4ea101c97b9e1cfe2abb2da23e5343751ddbba6 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_276/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:528363e9b7093980b5583df3748a2c9f3378cbb80ac4dfd8b898c38c5d7e2e8c +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_279/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_279/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b329aba6e0e615ef434b24e569f55076176b817 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_279/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4e54854b0899ac2634f05aad476210eef491a1afa44858e0ddce430c6456001 +size 3494794 diff --git a/eval-results/medmcqa/5/ckpt_279/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_279/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96fe79a7c24dcdf99d53c034db2cd8b359b083e3 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_279/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df92ca392aa6aaaef9611be243cf9e26c449e131a5bf513ec4e59bf06365283d +size 2853 diff --git a/eval-results/medmcqa/5/ckpt_282/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_282/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5c52a7cf5498436a1b343f2d0f4e8e06c2329a8 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_282/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4daed32ca9ba925ced0cc2afe1dfbbc881c8bdbebe711047cb989e8c476997d9 +size 3494472 diff --git a/eval-results/medmcqa/5/ckpt_282/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_282/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d4944434372530b028c3a0f6c4d64991237ce2d --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_282/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1972beb13c582eea95cee28bbe53822ccdeff92a55f00e0c803ba24287a78bb7 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_285/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_285/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a512eb46c73f18f604a0629faabbe117088f58d4 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_285/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d30c932c8ed2605c5dc365aa9c1fa726c66a2a9e1a890b5c63a18b93b60c71f8 +size 3494842 diff --git a/eval-results/medmcqa/5/ckpt_285/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_285/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b9f0c5f8d446759dd300c891cc639e98129d532 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_285/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca4590fa4655ea8c7d9d0743d601bd4a4883525ec1b82b0097bbd4692964da5b +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_288/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_288/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fff5f27da8a42bff5f1ecb1921ad66e357b102d3 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_288/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9caa80c371ab96211cb1e372b7c7d6e79cd534ab78737468616b0d27f375cbc6 +size 3494956 diff --git a/eval-results/medmcqa/5/ckpt_288/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_288/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9efb3001407c3370d33b0154aa14b3763c0a74f0 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_288/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1687aa292411e65e10678593b85aff0ffb86cbfce27b79fb416fbc1d48b4f0f7 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_291/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_291/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2759608116d91d788ee98e91356f2b8dfbc6dd2 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_291/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1af4a920d7a9874365309ca7309db036be6619ef684fe0e092696ab5f4cc8cb +size 3494163 diff --git a/eval-results/medmcqa/5/ckpt_291/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_291/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01c59936257fac661361e683f31ebf4f44734191 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_291/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4c103b69940f062ee8f242de3928fe37e48feff93b9f39fb53bd5146d41fae +size 2853 diff --git a/eval-results/medmcqa/5/ckpt_294/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_294/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20dd61a5e6b01bfe544adbeefcff7f60095ce0d0 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_294/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f2db87f9a9ce9ce1cede9a62bee7c5e33d81aaf8a48f0d9cb429de280729e85 +size 3494261 diff --git a/eval-results/medmcqa/5/ckpt_294/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_294/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05e2a3183d22cbd581bd455262bacaf3bd6aaa92 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_294/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c97597c48fbd1f0663c9016eab1df92d24087c9b5e4db85e390b0e9263db46e +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_297/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_297/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b1c54a6c89c15230a5b3236a8c213bac138d03c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_297/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:404334cb10589ffc25338757f2b6f76ad23b756e3373b7fc4676ce6554327aae +size 3495083 diff --git a/eval-results/medmcqa/5/ckpt_297/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_297/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cbc61c726d094a63e05ee7f406d045b55fde986 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_297/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b2c433ebf1cbf252ccb9d855c646c9629123ea52eff2ea7bd3538bd01663b9 +size 2853 diff --git a/eval-results/medmcqa/5/ckpt_300/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_300/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24680a3d99c18ec17d98d206e3a5a7c107242f29 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_300/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e777ef399ce9996db949c88322740e7343e56b98dd4bf2312330f59054515ddd +size 3494763 diff --git a/eval-results/medmcqa/5/ckpt_300/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_300/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5618b469d2678d08800139dc233449baafc2d973 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_300/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:440c021deb1ffe8193a31fd3231b38c0071b03dc7b042272554832677946242d +size 2853 diff --git a/eval-results/medmcqa/5/ckpt_303/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_303/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..949a7d4d90d2f300633182b3d64dc98daa1b7b42 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_303/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:314b87dc8a70e60c42bb61fc4b0e3d52d2f363358f035e86cee90aef3f1d3689 +size 3495054 diff --git a/eval-results/medmcqa/5/ckpt_303/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_303/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8c3e27d9f654e64acfc8b4ce72999da2f0e54ce --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_303/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b0860c61edbc5196029bd5bbb677210c7bd80e76d19ab652e34981bb2e664b +size 2854 diff --git a/eval-results/medmcqa/5/ckpt_306/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_306/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7c65ae7f5def61bc0118e604216b399dd5bfebc --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_306/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc3e7bc76eae18325a26e415724e38c91bf0e8c42d4b9c113867c273b4d30c1f +size 3494905 diff --git a/eval-results/medmcqa/5/ckpt_306/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_306/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0940a25e0d37649b1ec763647d3a8ef30c946062 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_306/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce446c1251bfaabfec67e8bd8710283963790ec1678258c5285a4454425c92ad +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_309/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_309/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6558019309acaafccb138ef0ed2784e21b7711c5 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_309/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c49655b7b0c7d4ae9ee4cf7552767a8b62a139d49da28d94b3f1d2ff7be8bc59 +size 3495725 diff --git a/eval-results/medmcqa/5/ckpt_309/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_309/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34f2ab6e0af05161e7a04e1e4a1e6341b6fe8c3e --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_309/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:099a4a84240e991209961f59089241853691065935be4333cb88b5afeb20d396 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_312/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_312/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e3c2e1d411ba4a71f782095e8fc27a4b646441e --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_312/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bdeb8c424de24f2531cdabd6c179f0c167d66b4e886da9e88339f9c7638380f +size 3495332 diff --git a/eval-results/medmcqa/5/ckpt_312/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_312/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45260e475365d02582a2bc9df5fd2deab56065f0 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_312/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc2904bb2dca88fb55ad294c1939ac22a76651a1cdd07d313a770e0e93d52576 +size 2855 diff --git a/eval-results/medmcqa/5/ckpt_315/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_315/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db2285488da6fd591f7681ecb9193fc61bd37e58 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_315/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:336ce19ec4b63c5065f997ffc28e7b1de7d0fe101611b5f649274adf2b2bdf85 +size 3496170 diff --git a/eval-results/medmcqa/5/ckpt_315/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_315/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6ca87a80bf2cb794aa6962ac8c28499655a26dc --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_315/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c9d4dcea75cf72724d7000d59477fe03f152ef51e281cce8c5e04557c388b6 +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_318/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_318/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1581e2a2058db72042afd184876b114a793b12e0 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_318/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870de9609061cf1c098cb77916569aad18cca1393f5ee4fc6603d102a95fb1f0 +size 3496157 diff --git a/eval-results/medmcqa/5/ckpt_318/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_318/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8609ceab1903b32825c49f6308d5a2a67179f6a1 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_318/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:223ff608e07fe0e95c2d491b649e847fcb1861ecef737635cc437b61661de212 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_321/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_321/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55168518ff60d77ee6731489e8d8c24c483ac5ee --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_321/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:531721e91d62b9dbe8dd6de195778a5725359fa170677e12049b756fa8f7d1c8 +size 3495765 diff --git a/eval-results/medmcqa/5/ckpt_321/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_321/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb616d15b763d41058e62d9bdbbf4c5d0a01247c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_321/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:220790855435ca04a0491aec0c75f847e6d36e855d65d12d986c6080b9036a1e +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_324/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_324/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63a6e000e564b3a97cf6326dc3be5fe85c419880 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_324/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7145fac5aabaddc9b071871899d4b3fd73523a2fd4b6f31adba1bea386e964 +size 3496433 diff --git a/eval-results/medmcqa/5/ckpt_324/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_324/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d04a2deec7e75150fcce784a001763078519fad3 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_324/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a47ce06d48d252143d64a5359f0cae8065488d53c09606432c90263f621786 +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_327/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_327/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a41ea92746c87f59f510aec5084dec00928fd29a --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_327/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1cbf928c60ecdb881b0d4458d719f3b4736c27585b490b0305b3c8db344351 +size 3496548 diff --git a/eval-results/medmcqa/5/ckpt_327/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_327/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc33d81576f5547e09f9e1e9fa31200c68be633e --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_327/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f756f41e4017bae365a933cf81fe08e36c9f8ec0647d02a369c1f9ce670eca +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_330/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_330/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d186cedf675082ab9dda2a3695a5a82f6b073113 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_330/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f9147d7447601099fec738537f904120d09f592a266c3defd05ea8165661d4 +size 3495541 diff --git a/eval-results/medmcqa/5/ckpt_330/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_330/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b748d70ffbe52cefed13e7073ac4bdb735583ecc --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_330/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79ae227be650ea3716208674d9cebcc9d6c051f4ffb501c1d08b0a276c092752 +size 2860 diff --git a/eval-results/medmcqa/5/ckpt_333/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_333/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..496a0c87af95ed56e9a7fa53bfe2f4a304cf51e6 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_333/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93393e958ed7b502a59a1d8811828702103850e4ed58f8af3dc6d9c9d28cb85a +size 3495778 diff --git a/eval-results/medmcqa/5/ckpt_333/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_333/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2463093c6e7dca6ef6475fa517ae0c0add42c668 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_333/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01003503b95b0d5ca195e0ee1bc8a5a70c7a0719646c3fd393b4d482cb56ad29 +size 2854 diff --git a/eval-results/medmcqa/5/ckpt_336/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_336/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7634c0979b152e71a3c9f62083727b9bce77f1f --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_336/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1411e1aef5272a2b122f3e21dcfdc334bd1f3d1ef5fa01e63cd82831c8d8b6f +size 3495840 diff --git a/eval-results/medmcqa/5/ckpt_336/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_336/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9dd16718a98ede24ce91c1f7ef54e482a0b59b40 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_336/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5752b2b9e602a74969d99eac09636320d09bff4f4d19524bb4d5f0a9056de502 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_339/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_339/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb302ec542a75e9b77e7def099a19d726a47d1e3 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_339/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa9d5d82e76b062cf7e0ea40a44c3ab09abb9732d12ab9868ada527dfbb6aeef +size 3495621 diff --git a/eval-results/medmcqa/5/ckpt_339/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_339/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f284588303008bece197adc30df194a0cb0f9739 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_339/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86930e63ada87f1abd8b4fa454f9221d745dfb06897c7054f9cae3d18bc85cc5 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_342/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_342/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cce36355f54a692daf2dabf0abf206116901ff0e --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_342/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4e473e23bec6e3e0ced4e4c867306d0f42adb6995bce60d74f484e29eea34a +size 3496296 diff --git a/eval-results/medmcqa/5/ckpt_342/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_342/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b2951d06a6abe477e7f03095383ef374249f3b4 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_342/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a98e856b6080cf841875b5c868b64572afda5f4e799825bbc008c7a5b001f4 +size 2857 diff --git a/eval-results/medmcqa/5/ckpt_345/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_345/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2a71a15246d45d6b2e1dcc0881863468ba432c5 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_345/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eeabe205fb926301aa9bdb34a3df0be4fefef2106c665d508ccbe58e0ef0f3a +size 3495459 diff --git a/eval-results/medmcqa/5/ckpt_345/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_345/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cfaa1c888603bd46f1382627942aa7218bc769c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_345/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07dab7a41ee2ef22b14e1cd2ed576808a5a94469829e2e4de112a63c117bdc28 +size 2860 diff --git a/eval-results/medmcqa/5/ckpt_348/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_348/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..084428605a1d0e9575b26503748e901b6cfb6921 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_348/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54038d591089056188f12d0f86f6a46ed15b63e265c749a95d4da7ff46a02b47 +size 3496045 diff --git a/eval-results/medmcqa/5/ckpt_348/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_348/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54e00207ed2d2bfabc25fd7840c4e55de1bb774e --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_348/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48dc514de2b746c790fe91265325c203ac0a7648b7ad9d05935f06471a79f5f +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_351/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_351/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..002658f968715725a8b3835d391fde60b5671b47 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_351/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36bb3bbc8d98510319eaeced43e7470711ed95fa0d737fb50a46b3da45652aa9 +size 3495992 diff --git a/eval-results/medmcqa/5/ckpt_351/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_351/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e09b1c2311d69351b90f39ac079b793cf2fdfab3 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_351/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4764f9aa14a4c71aaad57ae9942a7486c047d4af0d1e4a9f1f31c747f3837023 +size 2859 diff --git a/eval-results/medmcqa/5/ckpt_354/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_354/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..889d545a9544d8b5c9f9c69666567219322936cb --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_354/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b154ed7f49cf25a82f0c0c5572c889988882feb1b7a201ae958d780c5ca79476 +size 3495804 diff --git a/eval-results/medmcqa/5/ckpt_354/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_354/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55a884e2655e04644daf7d9d11e6cb674766a341 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_354/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d68454f5dd808c06e69e900fc79cd553e2c7ccac95f9fb646c8a208ce0b1ac08 +size 2856 diff --git a/eval-results/medmcqa/5/ckpt_357/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_357/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4253822a102a04e2d0fbfcd8f6cc9752dc7930c --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_357/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3268f47ef8ed22d78d54ee260dcce5b6364fce6a4fdc8499c56d1bbfab51d6f3 +size 3495888 diff --git a/eval-results/medmcqa/5/ckpt_357/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_357/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..130418a625e51e62ad848d4a6e260b8838f665fb --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_357/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8a0d867ffd399902ae8b356048fe60229c0acde4699b01c59c4aee563053f47 +size 2858 diff --git a/eval-results/medmcqa/5/ckpt_360/medmcqa.jsonl.tar.gz b/eval-results/medmcqa/5/ckpt_360/medmcqa.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c88f742442ac57ca657a784f521b35d002d9a467 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_360/medmcqa.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb2cafcb7b0c6ba21c88b545335873d3488e8aa78466e13f13e0ef24d76ab101 +size 3496168 diff --git a/eval-results/medmcqa/5/ckpt_360/results.json.tar.gz b/eval-results/medmcqa/5/ckpt_360/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87afbdeb04951e20c52adcf61e8c5e9c50c39738 --- /dev/null +++ b/eval-results/medmcqa/5/ckpt_360/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:845ab24e6cce4f2bdadd922861316cfcbc509c3d72bb0153e77092cf7cf63d55 +size 2855 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..233e9a1b52807115613222027ed9e37eb9b0a340 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c67f88aac2fbc7aea47783b96497b8e86bd0610841358baf410f60c48ec0ebdd +size 17013 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac1ea723dba08ee9d160a7b78a84aa43255a106c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14bd9558284b855ec20eb3aa8ecd5b9d09b002d9e38049c94d5c67f98f6fb056 +size 29512 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0ad48d31920537395ce4527528de19017790b75 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97d2445a01ea485b266060fc9a0373ba02c2b012f5757c66a0dc3e07888e4c68 +size 39592 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5edfbf72e240d61509eb44cf5adc86feb91d994c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d58085fd497150bdbef2a2afa19468f4f10c83710ca666111c8a36a674dd96e6 +size 26539 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..637d61a4c425f24aaf73591c6825096c88b235f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7768636469b333759731120c63e3026c00e3843dd452dc32ce84c7b0a8458354 +size 60650 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea7d6688f2f12dcd7fe06394e193904ea5f27795 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69be053f46724efb40d7c01b41bb95765c9620a089069474ebc54f485482701d +size 40082 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f721b3e76bcdd5719a4b40bdad28da2eff9faba5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c397a0f378667fad9c082c2a9dba66b2733497396d65d346b9da7e431ea39418 +size 23669 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4c5df516f34f0a852032b683f9719d987bb64db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5a02cc052551287a8dcf0e1b74d875a9de978b32c4166c6a1c142640f53fbe3 +size 30973 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5682fcfa07cb48a572eb2c03ee9b200397ed8f73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6407f21732de537d2975b6418995997dc1ac1cbba6f2937b283980d81cfb022 +size 22882 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7e6be00583cbe413aea07fb0eb320a2fbdb9f62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c819cb2973194b78c4db82bbb0fad6b079ba3afb41ee12ae1cb2498d15d25d2 +size 60530 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed28b3dc0d62926d4a2014a48a82c9603a546b25 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:854a2115d4735dedd53ee70d07461d73bd4b41e3d9f6cc0e8f3127dbb2476cb8 +size 25603 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4c2082f7efabbdd5c4b9eab473399a99c92926e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17a4fef93e306559b6040845dcefa27061894f842e034499585b262198dda153 +size 25610 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67542361228e0562c0973fb20df1f77d2d2b9337 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab4eb67fa93f976924e9f93b2729b6aaa375d90e162631b44f2381456d82fe16 +size 46068 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acb2e448845ddea649bab55456f36a5d5c0bb3f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f23cb339af0c1d79f453aa6a3f9cc3799627aaeccaef799857a672edb200cef4 +size 31339 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8e77f7b19cd54a9b7b3449df31c704c471a27e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ed13317133310dd2cb7da82c2ac5e57cdf3b73f48455aaca5f081c6ecee5bcd +size 28544 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e42368f229e69d32da3ca2df8c996ca1ced04edd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb661973c197eccbbcf00ec8158a385816613f52fd8da90d30057d8d782a2194 +size 74198 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..134075a168f076296a14af81b59133c24befcc40 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8af64e5408b5ac6119d3fabf56b106cfdf51ecc077fa11e9bf21e5aa0dc741ba +size 30031 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7bd726b80f595c72c2c9e3a2975f3560f86a04c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38e13cf54226949ab7409be6cc90b8882dcfb2fcc2f0be1c94036ba199b814c2 +size 19017 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee691a4e61249d4c9a1bad7dafa81b6f1fe9b5d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2789842f6ad347bda5829f751e77e5ee8ba89161db3c02b90b02d3da7e4726f4 +size 87318 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c27e11290621ebb56b2dc3da785c55b9e901af6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788066327a46f093d3db542072f73ad007bd17a732343d00f75a9eb7dfa56baa +size 49682 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..691cd831a80383c1d738bf7e6bf75554e1918b84 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649ca8c3763dd90428d99dc13f1abf122fbd2174b9230f60fa742f00d84d5a1f +size 31421 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78c7f29781df3cd5db07cb5cf3cb9d75bf538db5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26aaf37c1ffda26c0afc7e9e373146cde9b46e71f08272a46335dd6bca122357 +size 144776 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfe4168c55c5feb3622fda052209676bbf9c024c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f123f5b1f9e31ea89f7f0441d6e7eef27cde05dd016632c41c96cd1e94a525 +size 44162 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9234c66ae0fff4a8dfb3c053d59f993f775ceed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c50c38a22dee2065a34c91bb1e8f87c0215b4d31924f2cc229c42e0e0da8c71 +size 54006 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2350b60859c9f06ce79a216a19ddd32fa3db2369 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00a53d21cc5f8d146aef01c76e0615b546179907975497bbef6256c4cfb7e4e8 +size 92075 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc69fe65ead0cbc56256220e9b155a547bd02e2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85c6889847aa0fbb0588fa2cf6796e617912e5437786ca2039b96029d0acfad1 +size 56979 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e51dca1fc50b252bbbcececdf8f77c02bd4b82fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312198688f348a1cf2825d96b4042312437e96db8de523c6e9692a49f1d37f2e +size 57950 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64b820e98df1db56408b56c4e45011288b9c9ede --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30de1a47b7e2249cbbf17581ce394fd3dd10909e18b0999dc4097002e6fcda17 +size 42717 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41ec904dacc064d278a844ba6dac35c33f27d23c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea692821706810eb5f762ce9bc953d4d3e121292b1e770a43a71e281eb68b452 +size 142705 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..449e67061706cd832cf0d31cb9660108ab2c1643 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:509c092a326e38816308ffbdcfd43b5049c851ba1b747f030210deb011e75e8a +size 70861 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0158d883484cd4d2f42add85f003f00b94ffa493 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e3751219feefc92847176c240406ed99e3ecf00a2b2e6448383c6d6930c6a2 +size 161254 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fb99e419423c8f1dfa0d59fe981670f7fb2f111 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7fb1ad481e39ad3df2b70a5de99519720e82f35cf7252dbe278ebc6ad88b67 +size 208618 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b2f18adec18ca236720df4a9629035afb3aab4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4db7bef1537a25db8b65848e71af89060b14b864ed2da933dd9d72613a364186 +size 49433 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..beead961796a1c65f451b903443d3a270c50793e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd0fb9aa7e06500f456bf8d1ff3ab5a288a3a620172ee0e1ba5e88a219317e88 +size 31293 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..283bd376b7b81c014e4f478ebf32bb998a4ffe83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4e5f1b9c021d591c01b759933a7d2d9538244150de3f33a74077b66da119f1c +size 35788 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c29aa7994b3a54e1357b72d4f5fd0c13003ef86a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df951523196380b60f3354e80001bf7a4ba9b06055b1f84336fc367d6d630b4 +size 29284 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5093569c80ebe74c289157cd15a492502deb57d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5a4a821d882b72fbc958c4d56c8ce20a6a0ff4db2ce660067d92f5cd023c1c0 +size 40006 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e52d540d7f0925b3b77ba0159cd47a19e25f947 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4764bcde02e39e96e616bfeb22fa2a82be398ecbcb2d1b7a88395e3577ad19a +size 26949 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1270f516e37a257baa08120d57c834c4dd209c80 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffd6390e5f7a7d05cda18bead69238b2c5bbcd3739696109a4585f50b7e72c1a +size 21508 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..873df40c0825db8a4cc6f2dca1929000bd168b5e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3c823f51e4980f435dbb778df1234bca47685aa0eed0e61626846b3936d9409 +size 56932 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbef11300fd16faf843a9c59d7f5a0684e0926e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a34e297030e26b1f11783cd9efef21139b7b6278fe9bedfd55ae65386ca3d99d +size 22079 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a406a41d06759ed42eae242f33dc976f4244759 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2096528a3fd10db2d7449343fcde5322e21a975cbca60f224d7f36735812868b +size 167837 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb186112c2c4142c48c9ce2ecac03467890949b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92196b308ad6582e82305cbd9da7370ab3e5f99884073f3a3a1119d0d6bfcef0 +size 89561 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1521929b33912594fdd65083ebbea0373f44f67d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66dcd634f6e03ec2750afe2f0f815d1b08f073f4278be25c210913a835933fc6 +size 154329 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c11b9e207cc5180145ca0329765dd99321aef2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f16e06b4e04e89a0300942133a9ccb9ad056608e6e4cee6640883b910516f0a +size 78817 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..261712c11e9c311d0a801e7a9ade291decd6898d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae0152ccf73561dfb0be342070da1d61d27060af1f48cbf10b6ccdb193e5ff4 +size 73705 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66fec707ea464d97fabf50fe7a75d8fd06214a16 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d88db2bab778c86399f36538ed79c8d7a4d5d3bc6628d26608e129532518a4 +size 81901 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ac57d13f733b2fb7614faa390ff20b2fb3d6edb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c7b963a96d4214ac2609bd734c6b075cd7ec43ae9662618160919fdb832339 +size 89535 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6f93f77cee6c68697980dc051c2f2b0af2bd402 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aa01512edddbb54014b6268cdca1f3c4e2e927a221991e28b15fc7db91cd8e9 +size 1001276 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..524642056e03103d27e328429c97f25149426dee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9f5ec2fbe6ae451e097ef5d732fcdba4621736afee55c26b3846f34799f8419 +size 132394 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e8720349ddf993e2c54050b825ac1686321b5a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:438b5ed83a32127b63c1ebd97a01f7a40893c6de5b2b0534228088ec8174ead8 +size 181666 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e65346b3afec846c4efb9761e6ac232e75603c9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceca1ff7066bb1d1620f01ec56da089c3859fac0813424ac82ba4e54a2a53b65 +size 27256 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01d430df1cde02e16da08191cfbde0c1a69d3a4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d904b0f77b6dd25da07400d226629fc94bb8f6b1a207bf5ac160afead487c57 +size 119785 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1221d1192cb6cb2044aae15754b02a01823633e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e598c575e9ea93adb6c7effe7657e0b639b8a3030bee338ce4d4a0ef61cd936c +size 57648 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ce1b1189d47b0bb8d06fd95503aa1c4c6857788 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135c7795c444974f749f1d9b980bee851c0d922da625300281aa37d6bde446b2 +size 25198 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe8ea7c3c65d82ba56f6ca84d1074629b2cf93fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44945020fbca532e704d65a891e6d9677c80d34f40fe26a43cfcb236d671ff3d +size 39071 diff --git a/eval-results/mmlu/0/ckpt_003/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_003/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c3f88879c06282811f079f49728ae0c48b10f2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e37e994daec0bd61209578d193a89fcbdba51444137bbcb9f66495ad4c70fd4 +size 32726 diff --git a/eval-results/mmlu/0/ckpt_003/results.json.tar.gz b/eval-results/mmlu/0/ckpt_003/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dece1d9af319f62a5a084fcb4260537e61ad09ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_003/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db21075c94c3f15eb60e1334dab5a65e11a326f36636aa1a4c88f9b2a9bf58ef +size 7613 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb1997c9ee26ce18daa4153da1ba10a4047203f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a8702637374bd2eafca5aa04de6498ca9816fc880613b2dc4b6f9fbdb30b269 +size 16976 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab6b2d9e1f6cddd7d2c6061090c70252bd2e409f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf275cedbc76ea1db17cbf42007bdc878097ef0c116ee4aec38ff426d63fd361 +size 29393 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92d331636153b40a0abdca918cf9f1aff8c2b8bd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932682966810150e72e8d6198a32bb0b140d6936ec6cbcba7bae3d38a3315211 +size 39411 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a72f51478c451f254848192fda820e6cb1ca4e50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b91354488c09bef92f7c0a2fcdbcc9b0fd1a4dac0058a7d922b165fa7aea96e +size 26438 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7686e03b8b0fc8305d3a81ec6bda709a50939491 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b9803f4affc5958485e86d1b4ecd4b1ab90d27ef7d10fc164436a6c07931cd +size 60419 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61b1692f06c6a6d400b4c643f7d6eac1bb806703 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80f80cc69de259e8a2155042c6c0c79d449d2bbed67275dd325c0185a7a74b2f +size 39965 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6dac23095a570815294fcfe71363bd80a551662 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cb916540b38c320d01d5f10d52ded9a4e48c69b1a45a9650e74ba3eb7c5aa3d +size 23615 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9dcde3b779aa46af45b5cc10970523905479a4e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d05cd3f1600faa67d1a07f7d6df1a20988353fa34cb4cf67972ad4c550c794 +size 30918 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7448f0fb58f5a9e4ce9256027990db807925b4ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348eab636d3caa2db34d0e5d1220e284aac6dbd976082963dfdc7687e89504e5 +size 22818 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8aac3f735be9a7f75ae7e7d5eb483b5f48134e5f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cda71df255218ef302c03ad6d15d1a22725ea9e29b005dccda70b92cc691a503 +size 60413 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9329b56c1da7ffcd3c17b6d5cc73a2d792c517a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8ad10151675101a415f277b16629e19490928d55eaeb8e52bf0c2cf6eb17aa7 +size 25542 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6de64c241e14f51f737c3ffefc4faf854d690dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a15c8075eef50ca39f2cec2af390721d0541abae43047cef88e39e8e3d66d8f +size 25517 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b7e99c2caf2480630030445bb25ec0585f7d2ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5d385035b22a7c033a5b9c7db9ccb067b6b311f06d7f252ad06c8b8df2bd578 +size 45901 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3951bae75c540c083bec47a131c1ca46d8994f60 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf26622f5f9e8c2658c2f796f506432ce313bd377665362eb7ca097a9e88209 +size 31303 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1ea770c681c2e868b2a780274da25eacff3910b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f50ce790b606be7a90c99f00b7831d9461d9cdaa3974449ea2533e641bff4bb +size 28422 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dab9431329483f2421583f7d77bd86081e2346ec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6921701b47da9cfadf535e3a794f95b57cda546ac2af301ff6d51a02d80e9a5e +size 74092 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..743bbf8022af257df90fc9960acef909d9409f3b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:786ca419242f723a99c05caaaa85a0b4f7258f94447dd446b660954de6d61d9c +size 29956 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..255639ed6f0af7a006c634147357938bbbe84330 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac534d2cb5f05836373683d6b160a226e88b1b0653e133a62386ab2d1763955e +size 18904 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aae0e5788fb0603ee380b4230cb5cc9b6ddbd004 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea75cb6f4345d6e613323a8073ebf5abf37ed973ef7d6dd6ed75f5650173171a +size 87163 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..efd6466443750245dee1d1ede054f63670ec7335 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b78e4902d172fa6cbe38e424405faa628add606f683a41c1f13d69a256611c +size 49481 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0a7c1b4abbfa51de6d74cfbdd9c1893ed7b3bb9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcb28398b5f1fe2af3b50e6747d1b8dffbead16d5638f9709187658f5c535de8 +size 31361 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ec2d77415fada9db9543d52305e6d1fc421134c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01c94473772461f399f2adf338e69565a76c3314653b3b6bcad15d463730526a +size 144615 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b79c62ca38f2ec14d17e919a7d8801e19261bb10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b861fd75ee80e968117221265bce053c6c963b2467d6f00d75e7b3055c51e55d +size 44094 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95ddd34f50de367393c72c847f76232133f3fd2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:109f384d32b9941859f3474cafeaf7f75e66be33d3ea5c5f2167003b001a9fbb +size 53877 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7992c47ac5534d2ccd1a37d657a0a25168817200 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c971691e172b2ade25cb3cf8a0478db722ef1be9560e0b4c4a2e4a67dda7802f +size 91933 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6393ce0c5e92d74a0f85b4f64a725c7e36cef340 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc744f273765048230dd0bb2d5440967288fa3c36dbb9a2c8e4e63a4d58d5e3f +size 56898 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93278180d00b9371bfa7255826cb709bad26d213 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72547b8dd391cbc879e2b568304f6ca98db8dadab8382a37725bceed0cd780d9 +size 57873 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40bf951554eccfec97d4a2e8b27ed874450010f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24341abeb7797733cbdf6921046e805b0c7c63dd4b72389218ac5f1a1225e474 +size 42690 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1203eadfa4fbcc838e17fb42e983e809fbdfd64 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f727b4e244bd9b59e7e84d03ca8e75d829e9de1cb0cacd74e62c5663e94a26 +size 142437 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7616ada2007ea7a14506a704346c51643db47b99 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c1a2b909fef6642d9a7567cf5677d846f309b63fdc4f70b8da789aa671fd65b +size 70755 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32c50e26b8cb354334f25c576dc9e2405c99f652 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2a91eaccdf60e2478548e2ee2b12c9b45616e5eef0c30fb517c7eade7177d57 +size 161226 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e551b590f314f2e1096bab9bfd75c1ad46eb670 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a472044dafd4dd50aa8921162d25c502b245020eb0529bb9782e6c7a006cf9 +size 208502 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b86644227afd2ec457992c9553cfb8af40a4c0d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e2d5cb135c8c8900fc4c33d6767cf8825af869c80b87c1000b2e3609713d454 +size 49328 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..270da6ff6817ac9d54865122b3449ce42f531a52 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:396b172a6f9fe00f8256623cb70a9388ddbb33230bbab2c75d8b0acabe2313a2 +size 31192 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea378e723558c42f1c68140642ea80df9688c26f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6777f8b30a001186c468620e975f1776df490f0d6aec5603f8cd0f89fad3ad2 +size 35682 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86c2037ad4aa79db9ab7ec264cfe7fcdc7dd3bdd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2815f4d720a66e0b44f35a3ec710d34746ef4e9e91d9946b322b1524f33389a9 +size 29200 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..766b81a03d5731ae8c43eb7526a10d6bf98697a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81184b9509ef3797a25bb1eef0d34ddb9481bc85918130341976e28d19c432eb +size 39885 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da557130156c7dfa9f044173dc90cb3b9b34d537 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3e67b6d11c794ee4ec2970c39853a520ee8ff9709a1b60bfeef1ce6c4263e80 +size 26817 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3cfbf0265599aad8e9e961d251aaf375f7e26f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06c3e4036844d551a954798af64f59fdf57fa07cfbae0dd85741fdbaaf05d1e1 +size 21386 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc3971860a747d9a30604b2f8f8b92e692262e6b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c147b5c3a683f55dd6dc800863d66f5f39371c0b35e2c54a95b230a94b8dfba0 +size 56741 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52e1f461d2d6bf5dee5bd02131015854cec88523 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b71876de10c5375bec3809565d32a8a8876336e74753ce63bd42945a6a08aa +size 21973 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31bf0769e2535b7239ec3ab4bddf43f3d97d4581 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc8941bfbf721e07ad03614461e8cc5f8b46b4950dc895e0a47046075a93de33 +size 167293 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88a59d2c34e67d50366b16d46866fdb671e925f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23dccadefb71e44d5a983b5f5c3d5fdad28aa0368d3854a5940fd32e72cbb05b +size 89322 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8448098cfbc04610b02c597aa15f2cc6a931eb9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce3e2067f0b5d4964ebe8e841b08a3bceea73323315195092b36753a333d476e +size 153854 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a108e6ecf9d3bb86befb3e5d04f2145015762132 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c3a8d405b35dfa67ccf9301b3874d4c6343d52e96fc800880c017f4ab0ef5d6 +size 78623 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8aacbdc10d4be45ceca3c78c6914a8e89e38886 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c79966e3906d8a7ad586e4529c67c4787a3d6ef523698051b22fe8424b4baa24 +size 73519 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6fbd7efb0b45e2ac0edbfe1439db25737397389b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d2eeacc3f0c24778b7641a34dad41f999fffdcf8ce955cf439cfd18941b780e +size 81679 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c25a559dfc981e904cea1650e8ebd06a9eea9380 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e504a4754052e35659bf65e9f5269659036ff5a5bb162dd7d3c6f2f5c00a1ac +size 89352 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09304b8c89a8be8802856891af8782a92b97cf52 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c57427f21ba76716675385ed332aad5f107b7111b059d055a5a63f8735e06d80 +size 1000362 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e19b999383ace9db563db0f14962e673974db2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f0e8d3c972c0061ee81554bd30257deaa9f93cb9fda40a6bf6464bb0939255 +size 132139 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f992d9510a4f00f4c0ca22c07c0570034167bbdb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d9db316f8ea6b78102b16c3896cd53c3171ac4738f907f5f4df6da464a46a4c +size 181263 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..505bb05beca2387233de39d3d2b149560d67b651 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22926dce07cccd9c7dc973b1e322cb4bae76d7a2fe95b1c2c6889b93beda03f4 +size 27212 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8e8ef7a65962e487240f245989ac87641cdb4f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ff155abd29bf8edacd9584be63b347f87920fc6d42daa9a8a4b54ef9fcc6d08 +size 119566 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba6664861b1dc915dc56546b2aaef1dadec37c78 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c202e9be10399459d73eb9a7bb9d983d1e3b374bf39fa496e7bf08061e31afe +size 57447 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d735a69603a1c8ee6688020f5d92f0e0c8446bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150cc15e9ae9aab639a24aab6cf0feb5178702d7493ac9e0b70523ead5516583 +size 25144 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69c460cd99ea8300d9678c55ddac9c6641334e9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:643e39b5fa36850aa65fa9a4277d32dbc07d4f812db2dea616bab4ca290c8614 +size 38964 diff --git a/eval-results/mmlu/0/ckpt_006/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_006/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a2a01c2cb337f9975fa8ca5244957079ce1c218 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c008baba15603393c1d1a8d921a67e8d96ff6c62f8de2e7c8e327dfa31d1a1c +size 32568 diff --git a/eval-results/mmlu/0/ckpt_006/results.json.tar.gz b/eval-results/mmlu/0/ckpt_006/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb86888c0cbca9b12a1939195c5e30597a675c36 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_006/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d153fe042fc81aa96483692ebff8bc6e85aae89080bccbea52cc102bfbae5cfd +size 7589 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ec49f92cd69121430a987230f1f776002fa3777 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:967605ccc5b65564b9bada361aaea4646aa9ca8836959fdbd9e4e6e055528375 +size 17024 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d11ea2480b6e7238639c8d3add94d5640a906d5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf849ca1eca3c916f6681515edfaf9e14dc3f73c11fdeecb36f75864ee4b3c5 +size 29440 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d90097e0c199e71b7c4c3dbf3385af877391926e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd0f6e35281878d3309fad0e278f320c013dadcc32c32d24d54f54694b204cdc +size 39421 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa4a0a0baf6b1e228c61d1ba85ad5ebd8c6de245 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1c8e3b8affcd6d64bf051d915b55a5e17531812c13bb6a01781b935d637b71 +size 26424 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e9535917f7c0fc756dc86e0e1a0dcad27817070 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3bf3af625a544aa14d884ec9c9cc37473768aeb95b51165ae84e011be5f889d +size 60436 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15834a13be6660f6420f9b9ccfec4569885fdcd4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8087304450fe3d5c8147d4efc60a9aa0e9c848a8f620248e72c12314d35492a7 +size 39926 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfa7a9e595bef69be2ed4ae40776c93ba48f5aca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af69f465b6c2d7d9ac417bfe49f9a1c3b9311bc60d5faa4380dda8170800942f +size 23592 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a08c0d2b1572ef2b83912c9826c03349c2df42ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a32cf001ab123b00dff90a13bee87b142cffd4f5c6b106f99078ad1708570be +size 30888 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0ffcb1f6163a2855ee748490f1b5292159e7ae4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb1d38ca765226cc9d6559aa7846a6094fbafe344a02839dbca696524968135 +size 22806 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e78fd9ed3ba75c0667ff5d580676ecea09fc9b10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3e0696192a6e437728d89e754af7b46c27a6795e3aae491a9675d0179fdf1fd +size 60303 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90bd640d7ef7b8237ad6147efd928af1cbd4d8aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4b9fee1a5d9200e4a5b8015b250a13d8c92edcbc27ff40dfc1c28ddd244ca0 +size 25531 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55851070c12aa7a1373ed126093d2d634a82bb52 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2c7e09220fae16952c86f540dd9febfe19213647f25e08f6e4a1e06c906856 +size 25466 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..938fa8ebeffceedef2cf6c9e7304e4880e97fe58 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05d83fe5f8ea39f076da343ca965cc37c07e25067e5001d9f9f6627151d243a4 +size 45846 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99f3bd76327d7c6ac4677ae1d9a9e39ca6bcef78 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e293ded265e8c91e587f3906e18ebe5e46ccbe5dc89cdd53d1c2283600f0b1c2 +size 31258 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..003b1824c07a486f17d4926229ab4e11307428c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce9739be7eb4a9607bf64c5c86e48d7d8a7b5c76e07c21dbbbf8aa6cf77fe96 +size 28430 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a5f43bd75b3bdc25dc7e75610fc525fbf6f6071 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b7b95598baee13e44a4feab02889db493067843a0306a5c8f8722d51f366d9c +size 73949 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fd4819a2eb3536fdbbd606e7903812a27558517 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68a0ae762c729bb7b1b35695fd4e41d2456fd554d282eb8a431684744390890b +size 29951 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..159b76cc6a216a17cf8e878eb4803d8a3fe080d2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:313c7a2a60685bcfc9c8d29c025c6b31c65ce0dffc2fa0a1de360c53abd4e1d3 +size 18909 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13c0c2e94ef5cd0fc8f725a5cb909243e99c2e34 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38716ba3c98d44df6e2aecfd78cf0d8755036c287b6aa0e3ff8fe9a03205aca8 +size 87053 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..235d7050072dc4e88de1e367abe7da5102009e04 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88528e87939e00a710259d1b3af24dad87580f08add6af65651a9cbd49d1b77e +size 49509 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff8dc5e7a8534e798a7971b83a044ec9f6e3e167 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1f6be6040c441078cc6722c7e943637b61a03b3f21328781cc683d6cff3322 +size 31376 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61455312517e25d4c93b99ebca2245a5a6a4177b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:211f224d337eac217049723d3c20eacc38ddf13385c37321d9ca0685c6588b54 +size 144489 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d4710cf843f517e0df4752ae786a679615c542a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2200140cf022a5a89b10a52b2f51c3af5da7f0de4ac39020e9911ad65d5657 +size 43975 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ce408f9aa4b40f89c59186aad009934c05d0121 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a260ddee526e11f65ab862e2e81ab853a6623999dab7eab08e6d0ae35bd7320b +size 53763 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d20763943ad1ca3aaf2219ef3709a09472ca08b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b86bac3b2834b7de6a7e261e7da9142b59131a6214f3f8af2958018732acaba +size 91713 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7cc615725424799c01fe797b83983fc8b05e68be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb1217510198afd044c9cf4001e545ea81aef7e16b66bc220b455e7d80f549e +size 56866 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8dfcc3f0b958657c21834ed700631ba0cc1d2a6f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4149bd4eb763c77e667c05f688645e3ab491dab72d669bf459aa7a1a364eb532 +size 57724 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5410373be9b98c8bb1d8576706d6141af21f81e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df45e6d8269115831d1dd8813157cea2b06e749c4b2b38048a7681579895b40 +size 42583 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0482aac8d5e5102243ce76957dd5f4c7e6a1de15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aea92609cdc0a5bc7f69007e88e08d37e9cffa409b49de2a95ee4f7f902e1ca +size 142308 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c49ba8285216ec132f85d0a3c7bab798e0f3ae73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d93d0b8d2209785c2cf0529cd0723648e88861fd21518a4e289a04032dc2a67 +size 70653 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30fac12f85bba82994f47d280405df7e6a240e23 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:500e7362b1ed66a6462bb2ea773dafd6edeb41017fc1b1d3ee8d389dcf8c127c +size 160987 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a48e7a33a85d81f26c424414d1f855d25db0aa3d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b63a9824872c7a74af6b35c52dd4e9728d22e9f0a6d4e3e3465f818489c40b +size 208155 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3647a587d0a0bdaf9a4e23041703925f6dc442a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e845995af7ff7bc4685d4938c05ead0e808d450654fb08acd9e799171bb75f9f +size 49228 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9d580db4e30961df54023c80eec8eadcdff305d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1814df5d1eeaea10cff5d38c7cc822298de6938369fb648cd9ea358019b8b9a4 +size 31144 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..392b89a7aa50ff922f5db18ac3b36a6e80a30dfc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e09ad7257d3486b698cb7694285442fb9b6a0c9e42611853bba46cae18ef10d4 +size 35644 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..330717e210585c26b96256e19e92c3114258974f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05c9b1ceb07e51343eb6318e6cb541170317bdc5f7c3685a517a59b4c0f3fc64 +size 29152 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..688a1a6b25cdb08e317d612f17af6a20bfc56af5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2479cd819ecacc78d2a83699e9c8bc8331b0bdeb5981e2a167f66e02edabf410 +size 39833 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95b9f0f6bbff29735a4e9f82a98af7b1ce77dc81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d12aab388a033b7390fd09b40e9b7a29b3225152e1a4c03ee5f9ceb5f086434 +size 26779 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdf428f7b5b55d00ecbe6db7d05c9dc5e756d244 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fa5b7cbc78d67c31145569dec84cb1f135643651d2a0a2edac551ab5bb3ef97 +size 21388 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fff918c3965631c67d16c1e90524b3a1425babed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:865db1ecc78490d0d551a5c54e292c41d41a047ede774736e273a4548189d2ce +size 56687 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4910a9f795177bf3b8d6d531eba697212cee8f7b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e104e98891ca57d5aae37918ee8e37c1d861fc74ee8e02f8f2b95a86b54f210 +size 21972 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe3bce56f163b41fe380b67a64f1c8057910c34e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d6150bf5d19d584248b12723cf5c89f7fa1342d1057f118dccc568889b4bf69 +size 167212 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9dd37150c9430f7f9a2a05fa4e550a99465719f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f1b7af2aa0b44065eb35f8cf8ec332e34bc98a2ef190afd2edf1dcb3f60ae9 +size 89207 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9463c1025711af1513d230b605b5731ed0491b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13fad3ad814f7fd4d1679ec9f83aa060928f59a4eef9de971669c8fb4d79c95e +size 153505 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40054f146c4dbcac7196a3ac34f4e8b013cc7749 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3bcd9271bbc00269105d574cfae104cc7a3e30304bcd96016c4110e8a11a055 +size 78524 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45d7d6e3f98f3ae93d350ff6776de2884734a219 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26472125eff8fd5aa92035af4157e48118a24c33948353aa6bf349aeb7e67ccc +size 73476 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d75eaa7523162a97f3a8aa503570398c4aa6bf11 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:170bec89416ee6fc7bd3ec1eacd67682e8764c50759959da24dfb59e175e762c +size 81554 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2d909527e892b24b0721f4b265823ef99868189 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bc20f64665b51015495dc126b26ba827b83a08159d129d6d810e6b717f4e991 +size 89217 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcd9e843115181d621ceb45f837975a8e2b9299c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:737ae53ceec1ffcb2380e3045d6e36d8976c4a8691ec3bdd6ddbfdefb6fc6cb5 +size 999066 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..439c41e140f6b9cf15290e4188902085f12a8823 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cc57853d4ac4fd73377b9d949e73cf9a16c5adc8c65ae608f2e15dde8c214a1 +size 131921 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83426b7946fc8e1a8543a21d74141f24ef533445 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:120e00ddf5874739086caf9607da03fa1599a96453fc06877f331151c5eee656 +size 181037 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b73d68b9a589ebaa2da36ede7b49dd3981242a16 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0a230b2ff7258319d6fcc83c4d94579ad436369213995740c1d92525adb1055 +size 27200 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f04a7fbee437e5eaa1ef4c26080480423ddc0e8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e8af5e08c1125f251eebab511495a17a88ed907f9adaa16e2562a025cffd27 +size 119468 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4c889337eb07fbb51e44dc200512ae12c843a1d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c39ee50b23e439937e59885831f144b53d81fabd7ac2d467cb2b5f5a0990e6d1 +size 57358 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1147523ce1bcc8a7e8a3e7cc705da47d2cb61ba9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6a8241e686c0c588dee000da8d970beddca15f705ce830a0c366340d12f9fd0 +size 25065 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc9998043ac6cc8df099e42da6760aad09738e42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a75a8f029d3bd5a27119d65f17c01dd24aac76674a8f375cc4090b9e144fa6b5 +size 38964 diff --git a/eval-results/mmlu/0/ckpt_009/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_009/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74f30f5cb8ee7a0ab714275dd6640a899a201463 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77bfe8f47ae0cb8a25ba8857c88612e2cffe4d3dc4bd7dc0c76307f418faad00 +size 32470 diff --git a/eval-results/mmlu/0/ckpt_009/results.json.tar.gz b/eval-results/mmlu/0/ckpt_009/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2e7ad94e199f86f086fdfc1d63e5bee7cbfdf47 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_009/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:764d2010b9011b884b2528d18b12d0c79294736fe0c94bb10ebfe8c37b10c469 +size 7626 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04e69739eb2816034e2d8afd37123a3ffceec90c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3417ab3f285ea59ecd4b18f7016fe80aa145a462c46abd956575add218ad51be +size 16952 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc5ca299083a3ddc2bfef628f2f5d8505756cf6c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb8d1d9d762ee62c24305a39476e1d50ef0e7e4c92a7c5857c4eeb7b31f34b84 +size 29428 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a499eaf0417ba4b711847db66b1fc205d85bcb66 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2214539e33fe987761f8784e7abad43c485092edbfb8e128c4f7e5a7fa7e855c +size 39443 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c541c6c75925a1f213f581bc04614a90784c209 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbe3639a1a2f652294c18a98bf41d51c822ee2caf4e71a726b5e7c4520a5d02a +size 26525 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7756cf9881ce830b1f1f56b7d3df3248cbfaa48 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51d5762ae6ebff71de8b1a8dd6bd9ab2227786ec61b64e4786f5b4d7541d72cf +size 60437 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a48c60581586dc1ab280b07c3575cfcbb3a21bd3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f91ce5f3c60800250dcc624e2c60989a5c83366417f1ffc0bf4efb6701838018 +size 39925 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..959d436d2545eaed31befc7fa75deed681a53e38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:941b8062f3f917395774678bddd69c8005b27ecc08a980952e189b5fc639d413 +size 23558 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6af5859b8e3e5da25db1c2d782b147908b9b0888 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9391b86ac2a9bc9fe716139ba70871904ebde81b15d1cd852898d145f4686d +size 30889 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7eeff448270197303846739f137762c679c21d44 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781a18cc832343bcbaaea98f80ca80868b66a27a975ef530f9780d31eaa4cb52 +size 22812 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1eb8cb2c8eed51462d8c2a8aa320b5711aec36df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9beb2f8d746cddaca5cbded42ce5cfe1dc4b3546350f8350dc6defd593a2158 +size 60358 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef19994854e682e705e6d06e1139edb965843b70 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d26f553029f2af5484f3e3d15b0074e273f0d4c2e16947d272a058c25f2ddfde +size 25534 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9684325c11af85a95f44e3932850b379fd530fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c2d6bcd8a9349ac110f8b4286f6d1d48abb2e403f8b173cd6856ca3abed1323 +size 25464 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79d6301a59b839742c3ee80254ba776fd44d17c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932bd56db1db07daba1dff2b92798e32e62dae63b0c9f13d61bf0b3d37c8ebae +size 45851 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d51073eedbc39616c4f80aa93082f5e6dcb64131 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e7156f710bd19e8754679d2f78b404a5a2b0c0d7afa1425148b33334269eefd +size 31239 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be52356b7a05c4de3cff57782480c0d843b725f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4297cd278ee54f5f7167e935d7279beaf60479c30dcc829ebab90028e8d9608d +size 28455 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3451047e040fd3491f5ce8ee899bba62ff2f383 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e16fabee9a9052e276cc7ad92c20f1de1adcb2154c0c5e06ecda4691eaf6069 +size 73830 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b02072336cf4724e1fa88dbb1917c88daaa9c6ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c033c866e5b03e95325207f875b61fecc46f8f5c16125bfa3cadfd0e5363b0e +size 29908 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a510f6ae4d8c0f7bbf0aae3a6629a62f6fd5110 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d684277aacf48ec0b42a17c88360eaa72f689c5d7b93f466ec4b9033673776b3 +size 18931 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51376b6d8d10a3e3ca68f6a52f71f89b828cbba0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fa5d35dec674a0ee87f5ff1f0d3be8d1077504818b92fa1c9aefc7c7cce86b9 +size 87067 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54931b61c1e8700858f59105924b64f3047530ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3800765a70ac2c3bac9a89a0eb15877cd312dffab58784ba7d2e4926bb2cc3e2 +size 49518 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..119fa76f6dfb646b601d0ba5c1719756a12a2a60 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd04b0df82ab86ee0ab76d26f61ab4e0d60f02ca1c2fd2a3f89a1e27f538808 +size 31319 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84eb4c27b9fe55b62203da095020bd3005e6a547 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7bb1b61310fcfb361ffd4c362ad9d1e1ff41ef4dcc2ffc1fdcb0f1018e790b +size 144740 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3b72dc2b09a8d0c0aea18b237b6d1f1ebf9e714 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9df837d9884e03f0a94762ede7f5827a60a8581f455ff016f53191811eb1d62a +size 44052 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc6750570bf31924a44277665de243f6a0b645f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c43a033efb4be34c03f4392987f03bee2ad95d95a9ec81b995835362dcefa9c +size 53812 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..760ca098b52d6459266f3194a954dac21b592376 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6694314c4546307c2bc8d3eb81120bd276842433496cf0e75713cf23486a8f03 +size 91654 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43fbd6c28b584ca5c7d945249ba69995a897469f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10be8670121de636c25a61770213749615e0f2de6c9a87005a4e2e44c8058c53 +size 56793 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..483e6c6d0fa8b15fc484b0e49438184fde3b075f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:904f8e63d066f539312ab881cd620ac03dfa6cf87e85f4af431cdddfddb2df0e +size 57759 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fc745b32fd8361a7ebe714e38bb8973d75bd8e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d516a28e8cffbd5d6ac9c60e73903acef2194bf1b07b0c64f080f7bea12da6e7 +size 42595 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33395f904e2c9a04ab5885070017624af874790a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdcfa6a3f0ee9b66a79280daf2048c02666e458091a368d931479a33e15e8d31 +size 142271 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aedc6aaaf429ac83c3ec1e4e863ab19f02a2ec9b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac327bb11c2873a5eda9cd83fdcae41a2fa86566f3b0cbd3b303340fb18e29fe +size 70673 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da1a4623cd082902ad0174fb0ad495c401273ff3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8d3856d500792188ee94e3f49ac7ac69e4e8f83b734a040c406737ca076a80 +size 161181 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0cf2c223e8cc3dfc2d3f10713c7b1eb9b518269 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1603dc413b4e979a5bf9c3f1664eaa185f823e2e6e207a20ac99bfcb53b7afa0 +size 208541 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13b55133b54c89e6eb27204735d179a9c436db56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908d3de0f250eeca17a7b96d1a97af91cc9a2cc26f8795ac490fbe155b08a1d1 +size 49303 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7fd3efee402395549a44586abf9b1421ffdccd7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2c13682eb56793f5b285eed75e55bada9a9a74532094295781986db09b22df +size 31138 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be0f994d4142037be365f8f36e9d26adfdbf3108 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:418cd8da1f5b359ae52f9826b17615c41277234c9aec360fcd209956ac7d7fd4 +size 35719 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f46203d89295f86cd4a6b9652e6e4fc5a7ebe0af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9fe378028969d1a55cb60ac582859d5ff56b044b837977362833deb62c534de +size 29163 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..733e1752f9b6bb52406d00c5c2bd46e3b51da5b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea32930eb3bfc2bb5104e6316fae3343b1bd82d334e280bd5b0ebd7cd23a101 +size 39894 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fb74e25b4db8f4880867e286811f8240be950b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1342e4105dc18570519b394fba280ca4f4c99f1339ea742200ae40c2ebffc5 +size 26812 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f230b4a4895477294909dcee2c361da06cab3fad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4b723c91a1d945b9d05d288e86cbd8f9f83b9ae4cc5cc05506e98b2ad8b7ca5 +size 21423 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..036cc8fcdc2a0d5710481e62efd34f06c6c0f2f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3e44f923d417bd68e588519a665b5a8115895b84da722dfaf4f908559739206 +size 56731 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20246c0fb63aaa8fa249feef2a308a9d151c6c04 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10df90f18dbe4e6867d2af12ce3bbba03d0587616bdc4b499e49a758f987486b +size 21942 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40c1a046713fe59b139251015fd97c6ac80279ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f3cfbd4d412cd9ddd45bc14a983e9afb01d0ad62bb03f1e3ed79d70f2dd2532 +size 167153 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7e6a7611f3625dc8a043297f5a9dab85c02485d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf71d53bc8218d87df35c249152d104a08580462cc12e0feb05c68e7fa8fe2b7 +size 89269 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e70f208b6e82d2e95e60407d446932c91fb7aaec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b620bf7d352ad41c19f146f161b88621694065e2c8ec16701e9c0f43b55256a +size 153336 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0276ae881a7e154de20c048af33a1532c9ab6c99 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84da137d86a9a38115a8ee35f5bfdfc960bb26ba74c300b3e988b449cc53e3ab +size 78559 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2938fea61bc995908de5c1d30bc49680e2787889 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:638a61be3599b5d7f6c37b33574c49441ae0acf45405280127837d3ebe879cfb +size 73375 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c72168b670e581abab4461d1c0284c4e2bd32d2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a3779af6e0e60411d74a6b77b6f78cd6c691e479a3c526654ba03158cb77461 +size 81608 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56930d038600449d3c309f0619541df853a65871 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e749d71a6fe30101bddd58dac22df7c3baef9da318b333e63b29b675966c3d +size 89278 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02bfb248812001ffc44a63d7596f97b65d03c8ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380dc8e28c1fc9c822abdf556e8706c2aee3ee4a37769e9ddd4afa8c72328788 +size 1000635 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b15c241db762d39d16c1c043c4fcb747cf59cf87 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473f74384bfa61b03b04e9fc09da3c5ae60cfb3478738cc1abe1b0cab67d728f +size 132067 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ab1270b43792eb74da02a50b502bdcb29210a54 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:966e642ea2075ed2bccaf17fc64bb3ad81a453bcf596e2169a2f9e9d99be8225 +size 181114 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..895a4d6e89111f0fa3e8978c7e983f8fc1d9c3bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abf0645effded980d2a4042d2207dbfe76df2b49d5f6834aa10fd0f3e321d64 +size 27219 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9684bcb8b3397a0695ec69535b0c2e69b7e4e49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a19b7cb9e2f24f93c930a708227526fb2f7a7db2464c4a00a0987221b47c213f +size 119628 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb7f31255618dfafc662c92cede8f01169ce8800 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350e67740b55dc57bdd4a45e4dcb0a983a3af424b2aefbb349bd7df263b7cf76 +size 57379 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d5fa9adab58ef077add5df046da3fc16c7ebeec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbbc681e12554b99f733ff0647a5d91a91b76494b39a1aecbabadd2b6ec13a2b +size 25084 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32bba9476962bc781f000f13cb7d09ae399d2985 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0729c3a3cdea880f9b1a9dd07294422009c44910e9adf909006680e41663852e +size 38964 diff --git a/eval-results/mmlu/0/ckpt_012/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_012/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ddffa81a350944d82b2de8e16f918e359537b9ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b89616a17601b14380f89a64df294bdd8c7f2d96cc217ba9f0d6a0d91ebdf7f5 +size 32465 diff --git a/eval-results/mmlu/0/ckpt_012/results.json.tar.gz b/eval-results/mmlu/0/ckpt_012/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9cbad55d6f3cdf6812a1b5d49604892002e1f9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_012/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:659cbc313bce2c3c76b204d197d1d1b9d9040d58864936e613e0cf30acea2161 +size 7617 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1647ebce7e0a5b535eed453eeb44a771ba562e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:376fe98da0e1c478b673904723d495719433ab8872d87e7589d8644ce20ce159 +size 16964 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce3cf753e03370a7beb5d66d473288a10f798 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a74b5a9ae159a43d17a894056c98e123af952f0ed784d713fbc3e63d98699b7 +size 29428 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7952324fca1172718d373f68c72799e54fcec218 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f89a7fcaaa3342506daee987cdefed9707f78a6cf41fdeec9f67d7e8a3c3a904 +size 39435 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7d97b69c68eeb4eac7b4de150ea9e23dbf13020 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f860e4611bdd2781e84264ffbd88b493b7b9c75e61b63bb8de944da563f4988 +size 26441 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbf95ab9782b4b365d615760aae330d66f6b8d19 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ba4bfe4d5d9e354188dc1ca8b226edee891f166b1363b4847b185720c6bff2 +size 60532 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..910f191d224875adabec4d413710abefc273ee98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df18a20ed4b38e0c973669b4440311e9f69fc188b7a1df466f992bb5b2e55465 +size 39942 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccce7dc7efc039ab8bb7c58b2b22d54e28bead38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b9885e9e27b6ae28b85dfe6b96b980e6213662cebf8547bfbb93f5cc4d2b5a7 +size 23588 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1008436c654864b51b8dbdcd856e05d01ebe43b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c3e38e32e8ac26919391dfcf6d957810e30d2e80c3df3a9e67dcef3b7cffd5 +size 30866 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6789e34f9bf3f137a2a07266505eb0cb75e72b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35d6a5ce8f1c1b4b257cf773d6b94a685f3cc4d4fb539148560a29ee933bcd15 +size 22846 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cc70cbc9d7496bdb328ed9496b61dab1bcd53db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ac785e7050a445f8473826a03670186d0db60c21b60ff9a08aa5b2bd8f050e +size 60379 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27b553f6e210563cb6139c7e88ec15b8574ebc29 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec4f18ff58a2fc1757b06fd3d7956da0adf6a149cb9148cbe27b986610489728 +size 25567 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d88900bfd22e85001115dc0920ea2d00df754291 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef90a61d00881b7bbcc5ae4c9478192c8421407e7e4c01d4e16e15c869397d9f +size 25505 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bbada5a25c038a77f8e73d8fd6dd7e178b4457e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75752ded92ab9ac3725f36b68c8146b124d90cc0dc2b06e49af8c0d8a0388b89 +size 45897 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a3f512e9457f580fa4dcbcfb9d3ebf5b924b324 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:657263fd1c87bef3b6bc74151bd6504be1ca311c93df41f4c0ded5b8dac1e165 +size 31232 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..337632991827cfc9d78d86b97977a3a20b2c9638 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ec062f853fd46f37f66c0c69efdafbd4549c8c26a3255281081cf99a3269654 +size 28430 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce02fc47a9f6e13deaf3d49a13b0ebb60e4f783e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a24635971b4185627552bb2513dad01cd18a7badaebd59523d339d473923311 +size 73941 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80144a7f99c85ca26d25804323529bbee478a907 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a33a8b373ca2497d63f3cf437224edffad4b7f0a93f45f7a6459f5a0d057be69 +size 29920 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a20594cb366e44d6a16a4e7901ea9d81a713af2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63586c6e9da53656dfaeee795144e1515019cf080d001fcabe3fcb7cf1f6a748 +size 18898 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b02994a4d5d968827a382ec6d5b4d1c4434eb652 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0804e389e0bd787e5f526282848a235fb07517f22c1a0ac0aea815dde9aad582 +size 87101 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..474e408e91af54953ed6eb58c35adac9a27fdbbb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffe8bcdc7728936d4c53094dc7e712ecdc6d19fc2ffa599a02557baf1912d37 +size 49492 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1e00ed6704ccd3c52e9d982c7d5ec208583cf7b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7130a7d9808c80e6236a0870549e1ececcb6fe66770cab21500a4fb760362361 +size 31337 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..faef26719b7e115fd8fa483015529336a13e52d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6b77cd181c5eccad01761509188b2ee087eefe40c1e678e8541fdfb41e1ede4 +size 144543 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3afa8bc3c69dc22253841f4e5780fc349e59d849 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8817cd1d751bd1ae8f9420e18b5155f93bbcdb71bcee63f35c92794511ba5666 +size 43979 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cad6f4a84975430065cc42c59923311f39d8232a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8a9baa9658bef98fb02d9b39d88808d152d6dc139aa4e016d077d38577b023e +size 53769 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f54e380c13e6e5a3c6b1caab641369965cc0527f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85de728d62a6c01689f8fd0fd9301a08c45cb38bc096f3e874573771cc095564 +size 91698 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51edd42f44845450c75c1c6acac81ac8c760144a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5870b00ee85583c83b789525eb23b794f0db5d5adb201a47ddb3be07fb3733b +size 56797 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56c2c980e21212dd737b90a78eb1897db12c147b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e7f0aac2983e3630af64902e577d2ab8207c3ac6b18389a24db94a704c3a6fc +size 57858 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0017b5e6e8c2911e7eeb02345516c0b43e1244c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:168b0de990ae2e4d7ec4bdd8912ce3aef92b5ec64be6e1e4cc38855f75f7d159 +size 42645 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a3aa06d4f4cc4b2a22d78419a732c64b982acda --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb04bae0061cb77b5f3966057203ee2834dc6f45d8ba5d6fdbc8d86a8a38743f +size 142292 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3334062292e65fd56e222da6f52fad414feafa34 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be1067419de7a506798fa8e83577ea0911a0459b75e4ed7e3a78c66f6402647 +size 70687 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d12d1c3a21333196ea7526061566311951ba71aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29b1ae494d529f15874454d6682f526fa510fec831ec61f3da77044e0190553f +size 161131 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e304040ce2748154c38728f79c3c5d8e00202673 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9cc0fe7ca4835787cbbf96aa3b10f36a48b25b41496426996c58e2e29617780 +size 208264 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b841664cbcdea27241173d16d0ea2de842f06b98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1334d8667690f062acaa468b52dad50ae6e42d9c54c7ccbca47210b2aeae3a98 +size 49226 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3419827707350c611071f8db2603c36b6f019cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46ada5a51d6d2d1ec92735091e815cb4241d7e8442eeb59f1c32b534ffe94919 +size 31178 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db52e31c80bd1d3109db57ce5cece4654c6b879a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c5a1b607eb41b1c7865ac3166a56dbe2d92ae37c653b6ca7b735ff090bf4b76 +size 35672 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e81787c695cac0722ca8ed5b5bb24adaa607a200 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4be88d391fe36dbd4678d11dd8b4433b789dbe19aecee6464cf5e12f972756b +size 29172 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebb9cef2fff901741d68a5d5ca441438453b81ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54f09cb37b26e03b8ccb107c7960a727b73d06bff53e813c57ae33609ebb624e +size 39875 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c178991730da8686f5b7d903d99830b45e402ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f97a5a50bb24f138c6dce7c36ec56857558ce90805a2c911ccb67c5f3e2f61 +size 26818 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c50754819f44206ce6e50e58948eda00516bd2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:187885b8a1f55af51a76eb591a3eb2d4d66fb3bfaad3eca44497357c8ced07ac +size 21381 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ecd0db412e6c8281bd6131ac855be38b0a84ecc5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b195e41b8aca6a5b79429e64a56f8ac13a3abb06f11f6554f146e0a3cfded6 +size 56776 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0969d3e342fd51e0d6b3df2d44ddf21bd7784221 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c92d09c858a5ee6019d34870432aeaab8dccf64e7ea8202cbf32fc4830c677b +size 21987 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52841247935fe319b14e39b879f1a6f9d5097e42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad01eecf2e35ab917d3b3c69d212b9d234e40428aa061ef92c8ed85e69a4aea0 +size 167224 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..850538d9cbea514a975fd519874d9b84d1f4c6e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72f6ca8cca1f1a76e72c8ff8170bf620f1093d7247f18357d9baabbf64d0a063 +size 89298 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27817d5199d2359283bdd302500a226817d0461f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21bd3802ae7ce7e1fbfc3214b2848c2389ce5de149844baf4595cf48e629f5d1 +size 153475 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64e4c5a34315fd678ed76cfd5b0fab57dfe4eaf3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe221f43e24bda95df8f8fd8a8d37edf730feb169abfb36604fa93bb0a899d3 +size 78600 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1fc4144236947aa15dfba775edbba7752e8aff79 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f70f0aeab1487f3cb50e3fd9aec9b219e1623911c5c07c388f0388cecc16ea5 +size 73549 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c335dcd3aed36938d51873093044a35919c982b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e97ff22d3df18a551a5c53e303c62e6a8137de72c7cd5cf8cba2498b2d1cd0bf +size 81607 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c90abc727f3a09d36034a7778a4c3276dd0e7669 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d844a0fa22a60ad72f3f55fc7606cb37aa78c6a47e64b1ebd5520a5e7ec280a2 +size 89287 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c782f1657e7239547531b1ce8ad0c6e22f83c1ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0910b3b4e8a0e79d39af073b68af8ebffc5d7d486600e16c4aa1d4358d067c93 +size 999453 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fefe52d645968fb823b1bd36eb7b25d341862e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4425443da6d1b804a58627d23e43ea88dc3c116ab1b92686a54c72e719d8c173 +size 132153 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fce2ccdfad78b907d6efca3e045303204efab8ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29cded50370e2e02cea70139858a2b057a26986f78bf91e51c2bfda3fccb99a +size 181146 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33d2a716e3563e20d8150f7484e242bc1306bac2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808575f86f4dfd739bf60a201053d363d34953735a031241a2efb6e2e13b9c09 +size 27201 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e9830fb35ab805348a8f39a3e25a8e062197516 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5252bd7ce249c2d562ca4aaf9b935c3614eb0a3fbeac3ba01fa6a63955aebe +size 119582 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..343ba17adf9d6bd839c9a7cd37811d86bc082ffc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9b5070423b66caac6008e793aa91b3045c74cc38b27e6274693a1cf55cc9a9d +size 57418 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edb96d93465a5574bfc6ddff1d7875a3dd664b1c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c3a8bd38f759eea78eb844a9cb7c4364f63f0c8d47f01e0b3668aabedbab9a3 +size 25118 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..029ec6175a85a9a7b02389eaaafe20337b6c5ed9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67864b57864c7a5980340a8f03a1444c24d4066ce7defb7795127c31d424ed22 +size 38994 diff --git a/eval-results/mmlu/0/ckpt_015/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_015/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..247373f67b24449559c3ff9e68b76e23c8d07b29 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6a510549ac5ea1dad291e2b4d68c4dc3c7521970433db785ab2a9ea8dd934d9 +size 32515 diff --git a/eval-results/mmlu/0/ckpt_015/results.json.tar.gz b/eval-results/mmlu/0/ckpt_015/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68229aa9d371b83d11138078e57576e46f2e844f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_015/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec60b8ff1bb5e47a053f0e83dc98510ccc57a8fc7acce461a58cac9a4c78aa6 +size 7585 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6042df9b76c257c86d84cc183e10be51d1910812 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b4e6113bbeffa6f76e36868249798a13a5e263a058088de27fdd8479165bcaf +size 16946 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d71b80619796137c45e1fe219394e9dc0b664199 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b9b130c535a60bcb4c80ca683923b419f9e7e2bdffd0599820bac92710d5db +size 29443 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e8a359de14ee7d6ae1fa09090ec8fbe3d743d8a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4ae15623beb2aaea058bbaae435299b7e0d700941f211c8e23b2447a87b244a +size 39405 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abfb228de50caf08e999b3abc627e6ebcab00f30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ee43fe22ff3fbba87011563ae07fe228a59afa8a192a0943e58614722ed6776 +size 26402 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f28dccd6161465bac1ce238c7c228f974828cbe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f17812c929609be74ccb8f3915ae2c773ce1130bd5b6a8cfafc9e54a130c37f3 +size 60514 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c25b3a19927996031ede73bbbde5245bfdb3d5b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d98605e314f057715475548a55c29e65113843bb0f556953695dceaaf38a0d31 +size 39954 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac16e6bbe5536d5f8c8b2c479c848f8ec64180f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:989a080c5b7fabbb3f2e56be54e3d4ecee86b57bffdd6380e0ac181a035bc6b7 +size 23582 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e6dfd9b409c7a6c0d4fc986a9eb3e275465495e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d8f99c1b549a917214fd46c1e566e233a0ac57a5c411ab620cbebf2ab35265 +size 30929 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3508d88336d52bef67c4dc091d79bb7a014bba7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5cf8151dbb73e9a18cb80a6cfe2f988c514529f188bfdb31b00f34ef1d46e4 +size 22854 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fe143b862e47135c70c2f99d8b38935dc08acfd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31875c99610d1d912e28d7f0f52bde6e84035d3d547590401c05f4627657e369 +size 60342 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3624ab6c80300b93269ab14571d0f68d12cafe3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32bbbeed1179e95178243f1cf9add270d54eca8ee6098b3b45fc05a9272d0997 +size 25519 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64e2fe7d23a4af05a2437b3c645875018c2bc21d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b2ac32365a05b24ede52a4aaa98818e12e8d7e7904314fcc6465e32fda9ce5 +size 25489 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8c78d324e95adbfcd76b57b33e9a94e9ce947e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac30ad80faae1b2904499c105ec5a39353e3e150473f6dcd233992e8eeefcb3 +size 45873 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa7d43e6a46e89cddb55e7e33e7f7b0a71504bdc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bbee8fc36d90aa4311d7c897fc11a7239e4409fba769a430d3d2bb09b83bad3 +size 31212 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1af9cdd0b537418ecaa7e2d66e4fd2cf849473af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5df081fd9b2b098fcf61c37c6377fb895d2714bfccbae3a4491b823e68bb787 +size 28435 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d7ffa70431fdaf931ae080a71806ecd2b15ea49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba99dce97b2ff9742ba3f5d3f987f8d41cd5c6c8e2743fdd5dddd1d5066b83e +size 73988 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fa494113f36dd4fcba27a82575d5741dd54739a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a2bc9691be9089badf9877702d4a82a1a0c4d9120491d76d3b8a21d5b1485f4 +size 29925 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..882b62cdafec9336b1d2d24a76e259b1800b6f2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:839ee8c35609011bb8de46444ec95e0cb013157fed1484cf2ed52d957f5a823c +size 18879 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..146d9aab78b2a8657ebd0d30b5239cbdf9184930 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21657b404b0fb87adcac39c777079e5b1a0bc20905c857b9a180c2bb494b6c8c +size 87095 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..196b0b14ee812589de7b971ac933b93ea8d14665 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad89d8244151582b81001dc9c443b1c8f551fdc9a1c95132aa3fca153cdf12c1 +size 49482 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..152473b70a62294aea3fba541ba8a7f06d52d6f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7eabf1737ad6a30f87cb851a138b926c036a557bda61de4ed70298f8b651f26 +size 31327 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08aad30575f601714f09de67fb18b473445aaf8d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bad1666edb832ad8aa9abb72f27573c65337b5f161544c7b288ddb8e8247c6fd +size 144423 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..770274bb64c6b89a33243531c91393146a723953 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4809dcc81528074035b8873083ab0b7511832c3a826aff7cb5f68aba1c51c71 +size 44023 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e29df31db89d21f857f9ec6df1559a0de7c642a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd7ce5a88bf7c2efa2e54f3c961cdb5826eebae0a58441cf763ea0a9ba441318 +size 53799 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b664adf4e37070e83cd5459136189f509a4336e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e962c8ea2a955750315b0d552f39c08309d8e834b423448ee451d65ee0848b57 +size 91741 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e478e811170ef136043ad4978a93de7374d314c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78f013ea7e4dd2ca5b1f998fad529c208c2d8f35bb9c8e51595481cfef12061d +size 56789 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1233f674d51823fbf2e3d312be19c5a14a99c0db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ce698a453622ee895f0f11fc4b18924b008b7924b09d4417decf65fb9eaed5a +size 57803 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cfa0fe5c2e2804a033fc476a1569a2fa1f6df31 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54229d04c005a5047d0a43b4f5cf08fb264ea349ed4fb20d307db0b4249da0a5 +size 42577 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53278da87ed5e049e65de6bce95946b5011ff643 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94f434a63fa252d7b31e62f2a83d289e2f791ea36a4f3a6a73bbe11322727486 +size 142332 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..313488919f5a9b7203b8a57ac8fee562d82aa246 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c794f149781a471a16b30a5d506ae6b5fa51f064a529f65b99e06914c92a4b57 +size 70624 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64da2ea08c9dc3d0f4656dbaf02fe1fcdc843b23 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9142b0e86332d23b10ea524ad2c96ac44d2ad0af581a5dbb977d5927b8bad655 +size 161007 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b22d6984ac74c56ccf654a10560667a244cea13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a46a00dc9158c0cf97a210ae7ac72bb81ca66a7d363b4e4cf0c8a02be877a18 +size 208170 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a477bbe3558f482de816aeabe476f77ac5ab7bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c0fbedbb7e91cf0529c2bc7100fb1efc21d0f530874d69a5914d6cb6f29c6be +size 49230 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f062fd53859dc498954b6bc44bd484c950545f95 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d7b798aa047f5926d7f3d8324470b2e4864d7f2b6b7965c6374cd970d8dc73 +size 31167 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9cc781ba8324bbf421353871962ee50a15c0277 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:670a3448569cad476a756d353a6fccfc23fc3774ba26d1df7b7f64d138a5550d +size 35672 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bdad2fbc7d1ebbd54a102f98713590b13d3df2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c96809afa678086be33ed76f2fca633338f9d266a3d76f13c3df6ac920712d +size 29198 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fbb05959bb66482ac83bee9668c35052d3d2d1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f85c16e4f2a310e42a4d77b55e9904be6e6c95002b7f28fad61755a36d078fbe +size 39886 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18bee5746c745972fe83b80cbe541a976558c7b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae0657d7ea1d79c396a3d884cc1295fcdee80e6239f32af23e54529d3be494e +size 26797 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27158eea12dd9242c707ed9e0c046bffdb0c242d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb83823dc175c826f604958579e7dd927ef48a1481dcba35c5a295a8acfb83e8 +size 21403 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6db34eddef6a1a93841f6d283fa07f89c281cdaa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5e2f3c6d74ef58233ab12ef788494182e2813b5d759e6b08a391da7943d3cf +size 56737 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7123b1e239a16a7746adb2a242d867b00a768676 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a2bbef3bd5adcd45c72771796f2ced02b9c18e8da6e5145e7066c26f8e8b12 +size 21995 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10c73d0edeb37a32fbe5f317411865059c8dcc1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42425a49b2af6646e79224305a0bb3d3bfecc8e8a50fd8d202874673d6eb560 +size 167225 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b9e731a0e7a01f400a22d03a4de4499939db9d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f64bb12b892583a9aa2e971f3ad0d9fd596ff994abe7620e639dfd97dcad358 +size 89247 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1c43ba89b1486b143bc3340b45514d62dca1613 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11de43aedf5c18bf180cee45075352f94e5787ae141fb01e5607b62f7365911 +size 153525 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e6260e343d17192931b7376724da408285c0ba3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08a585c304520e5f3b2af7a88f271c366cf2d7bf5548508b231a043096628c7d +size 78528 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..004b7274579c308a5c6784a77d734445758b4e77 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ece5eed065fc9df23b6ee9a0deb619e27ff20756daf0b135c55b3bfc8fde5f3 +size 73510 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5162165cd2b77672d1122b63d6a181a6c347ad0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35419362575718009c9d14bf5d4a6237c495252f79709956cf68a2e908fc8614 +size 81617 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d5304284f25947ca4351f9c14a79382d764310d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6eb6addf41307d3949320a9d4bfd81d9b53b2b774b6a6ad83b3ad560bed60ea +size 89209 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47f5a68a687ac6465247602d83af8760443a776e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e6633f3d3a5a8969a7972a1884fda3c021481f117a477a6c4d0cf4f3044702a +size 999445 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..306ce4c2f43cd7a683ca36ca2e67924d53629b42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77aa1028cffbc27e1ae8a715c1e461d5f4df193b8060f3baa4331e8ed7546e47 +size 132066 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4185d90238492b7508ac49453ccd8e7cdb727e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3982974dc30784a0b2cf51d7b3e2d9357d627fa655374bdbd430017e6717209a +size 181089 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4801eb4a3ffcc5f82afaa6917530f28733decd2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23f4d81b9fcb598ad762605d077fce79c38c9b86c7b5a69057a342ec2bc32c3f +size 27231 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5bc05b9c34f01e386a18d6657648de080e63853 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20d8dcc603224653933d69b1487af51bf61b506e8cb26800693069cb132daf7f +size 119575 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..132cd105d5f465d689a6320e4a10f08219dbd0e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dcfe3a1ec7c107151c3cc2f44ada338a76a202d562bd46b1af171236115e98c +size 57349 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f08875d748f2ec6e4291be3a38be16df15bce64d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd59af4aed5ac1c812657621b233fbd96cd8bb20b3ef5d9c63040f4bcdb03dea +size 25043 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0106b69caa0c3eb8665069d5c9fc15f12340dede --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c6129f12f21ddd6eec39890fd51a452dc1d088b4e11f43b30e2252ef7c125d +size 38968 diff --git a/eval-results/mmlu/0/ckpt_018/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_018/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c24c3b03a7b3b97639804ebd5c15ee8da7dc04d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a29790d88ed2f0bb0232aa5fc97e6903c9fc70f15201f567e9700ae008ad36 +size 32512 diff --git a/eval-results/mmlu/0/ckpt_018/results.json.tar.gz b/eval-results/mmlu/0/ckpt_018/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17b28b52f271dcf617b74d3e7f422aef8386700c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_018/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b70dcecc332d105d45be2c8f8e6ed8df63baf8e509ad8350655e7cd0149330 +size 7605 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69aa7dcd3157488a864688a04a4d872796e922fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e765bb37dbbc80114b8fec971a476701297afb742a7f14afd4b2a5b1cc0b58b +size 16942 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8767e1843591b249a873e9d906a89bf24e44cf02 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4cce38eab29edb1ceb495349c6ef2f1326585a5b89f941d9b5e035307f97a5 +size 29432 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dda862c70c25382310671788a66d63b43301e228 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e0d14c04ce539db59fcfb726d0f16b275e04fc7273fe5b413ea3ebd6d28a1dc +size 39407 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8eb84903c5262f9bf12c40287e3b017dfc5ca55b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2b8275a76ddbe4222a73aa432cca5d7a9f3625e1812c8274dd3bf3c043aba2 +size 26445 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6da44f65d05a17430017a972b28d925d53e39650 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:612f5b8a147e74f707d07ac3f5aea7953bfe8690dc90750c5f60bc5dac5e41ff +size 60445 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae7f334cdcada3e09a85a544f4b4d9ca939e21f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6765c54c354d89ab18fefe7cd28f6aadf1a4ca2ae9a975d07ebda267fd66eaa6 +size 39890 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f7e0ee8a6d7d8a6f25038749c57efdd3a80300c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9841b8866cf20423a2aa9449004bf25bb46aa97c5d0330fc158dc272f40a67d6 +size 23574 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2cd50e25ba328e5a4cb4bfecb8e0c966dbecb6b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d0151aa6d3b532c67d368d071f1c5f3bbe829dc3bafcebb699547bb36da906f +size 30894 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6bc6fc193f1cd59f75bf7461d2d76a499f64c90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82014e654147d5683600ef4b9864bf5698926da846ffa514f14b6614591ad2f0 +size 22818 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c13f5e8b3d67945858c57c2832c573f2bafc7a9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96deb5cb5f92ac1d4835841eaf95dc90a57565badfca38d930a89d61aeabc957 +size 60361 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8520e2a94d5a6f54252976d848bd50cd1d8633f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be7f0878178ed01de9d5b3fb1c6bbeca4c743f9e53932f32a4c787e29b36fc50 +size 25504 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fdd1373c3c2029aab54d104889b60f203c1cf10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52718819050b729ecc1da14e53338ea6a53f53fd686b13d6b41cc8baaa7190ae +size 25470 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77f1e80460113dc44c2ce1a211946fde0f704dfb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a0b7119220da9ad76ffc861919c3ee4c9d6bb7d849363a3cfb55a7fc913ad0c +size 45868 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db552b6e0d082ed5fd60be1b1f5c063d86ca739b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89ddf13211024a14f73de1fd0b35a65eed9984e8e040b393ac6bab8cccf4fa5a +size 31226 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1f3ad57dfc71a95dc21713dccdff8dc51324a56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7c469e42ad5315510d6c024e5e835ee87637a7fd1e6a5e944bad2ea712b44 +size 28378 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b285f371bdaff11ac13297d8be1928b9513c93d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cce89461ea81504946d2fd14999c0dfd1b5b0dca91b8114580b659e69fc3b8c +size 73923 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..342287c6142e9881431019b652faff68574e31f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc937e7d47dab46ec52c7d61bb726af404ab98954787528d96896c9e7936a67b +size 29922 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..158284911ba2d959b2b2af79956baa72821d8145 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ae34d9c8a9c0d35e0eeedd1d10e243df676644de53565c3b33324d0e88d8ca7 +size 18891 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6276340c2bf7af8418766fdcb22d553de178b499 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f2c6670c77d90584841277c9e049c6d96c0b6e3820517f9cb83ae5656fa3514 +size 87067 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc5de0ba70b220b83ef79e20c1aebf32a0386301 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e926cef23aaefaeaf88f30c53ad04086939f53413b686e1fe71a11546c46a5f +size 49443 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8acdea3eeb93c91fb530cc270e106368a3890eaa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4920215789872f82e0b963112c89f0f1e82e9d493cb8be726367cc10faf86169 +size 31309 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aee8d7064324c8a93e3cdddc3b9faa03075b017e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4504ab73081c9c9147fed31282cafdbde067527f6372d891a80302b2c5b930d +size 144471 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f8e3aad86aae3d527fb4fe5e06a3a3e97a8e458 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32f449e640e14da991d2f649d0c9d5cd8e3d51750ddcc8e644156a00d058dcf2 +size 43998 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38ed5b915ee9546b284a158d3a14e1e43d12be34 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ea3b7c953457ae5b6fdde71adfac0662be810258d2c2dd7bdad5689370521ca +size 53813 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0649055d1470ca186fe3b9e82213ac847ffcb967 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae7bb119ff46791907571fc22b083253f13799967aebe055a2ecb7a82ec2f5a1 +size 91741 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86b0f36b591c90b8e6a92ba2e9c3800b92f3aeb5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dee007a35ce9a73922391bd114cf1ef229dfeb169fe508899f015ad44b9aa78 +size 56820 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43ba7eba764efc8a7cd3bd2d00595ed41e1baa89 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a768b32ca31cbc865a81df8124ff6ae04cbb5f2d329e32f9189c7d2064afb949 +size 57783 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2d758281389bbaebfdc037abcf9a31ed87375fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346a528a6610fa1a3dff42d49a4c7b15ba99ba509421354f859cdb7456d519da +size 42555 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cad7d67a2102871aeca2d982cca8158a04c2b0e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b84f191cd9a12ea2d314762706e81f7211bdcb7acf49a9453a8ab9c9fd2c6651 +size 142311 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba941a02cf0b8741518a1707469fd8aa42ce386d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de6ed89cc825157c72f0411489ed78ed92902f98feb5654f491e6c0c13f9f6f7 +size 70634 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a15f672b06368c64b909a102d655d72e470dd4d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ef3b238d20d655761b01891a8207d23a88e06185c090555822cccd0ef11ef65 +size 161069 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89782f1256a917bca939d2273365453c49a0336d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7e613fe1181fe99cbca8467a5c4a88ddc584dab0bdb0fc95c4d2ec021113966 +size 208217 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a36057805a1d192ac377f6144b0eb7dbb739cda7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c649ba44c4ddda61c94ef1c95983f3c32bd38df89e5927ed40274f178b44a34b +size 49314 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ed7c8db20d02fea11b698ebfb593f7e39599853 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78646ba3699f9a53979d7e2db849f4be229beeda153f6ec5570e13fc1168b7a5 +size 31142 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44fda05a3491c2a63209dc4fbf3ff1e93e309618 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f0baa929e482522fba011a4a6bdef3ba1add781b075a818e64a360440dcd6ab +size 35618 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66128df27b0386c81259389b1953de593de002e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41eb552cc7349f42e03b1af40ebf74b8de4df899e75f5f4cc98e5904f815a414 +size 29183 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91b4f60829cf9f05746835c69b652b23ec613aeb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5deb9f5e9a246911c18e496a77169cc849ea5ca98aacf0c73307e414a05134e5 +size 39809 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5c5d98b51b9ba2a9570ee3284763788595add64 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af4074a283f121525a850afd3c372cc005032792dc1feea2f87ff96ba873faf1 +size 26765 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e833180b90c124789e75b62ace2ef55f8e18b796 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb0fec2cb1848b9ac3e42d4c4570b8cdfa153dad4ef61c29db15d92aa2a71580 +size 21407 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd073b7266458d49dd4de3555e59490e2d8ecfc9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957e1c050fedd88e68a223c6ef0cc690f9ab8878213f1d20e7fa6d8627bcf5e0 +size 56699 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39ef38dea5213ef0c956b2049ab5bf9b4ffcc74f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d608267f99b18bed117ac548a4c00b47aa56385977cc9b6fd701f0045526fc +size 21981 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f653b457dd47d5aa451118dc652fe9f054208c4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45214f58b279b6d675b49f0440a2612ccd47be0ac21f910402f5ca92550f0b8a +size 167199 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01abbe04d16405c2662e489fd4dc696eb7f624d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6b17b8ceaa91aa15f69bb412dfb04676889ef3919d71ef4aa15fe035165ae43 +size 89240 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52f5d0755f9c7adc64c60bed74263e040fd1a7fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83d2572496eb6e7e6489a644acdc5c24cc7c63745b2b6d865c9958b754a5f82c +size 153190 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb073a442e553ef4495466680952cd5da920dcdf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79a4284f78559ef07b091940b85e6d291eb298af54c62770c55f7803cf0f064e +size 78520 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f3066d0d9907f0d34b2c49869dbc9d393bcda13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64873e734026cc17ac866d30c624ca65556cc3a7a3e7ebc42ad72988a39c9405 +size 73470 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4dc3d73ed8d3a7f990b3a2760a98caf68c7331d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c642d12be31f8350968d5b0d1fbe5f30efcfc8d8844276ab5651aa716a70a11b +size 81639 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9707f941fbaf8e5af4bdcdd8ac318bcda4f609fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be81d07de18216a1b16a2f7f2597c602ad5b325bb435a7afa5c731c0440f8134 +size 89219 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b3c24b2a7553899afcedfe71d2088babf8c1709 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d3fbf899a4fdd1bbb9c19258959d838e7b3f3ea42a8cd3a2a02b4046554eff5 +size 999170 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b2632959acf57f392ed75639dfb13a2967a2472 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881109dc3140cf9d32fc76588e1fd59eaa4326cddbba288d297783324a926e9a +size 131898 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25c0ddeec3c701e030cc80bc619b65023449bfcc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20d16c52901a7011bf426317625b17060129e12b6554e4ac1eda18d31511867c +size 181070 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c18cde9e436e66bd0eec26133c69bbd94771684b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f2e82f30d2d814a2325a5f18801d0811e725a2b5591423d26895a7fb62fdee +size 27216 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c67b6f100714618d1c0c4f7f65b2ca6d9cd9266 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73e9bc60ba0dc048aa3cd105c45543a837a345858671c0ec8301b5cc443590d0 +size 119489 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a89eaab45b8fccac74643220a58fe8f71e02aec2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:855c16a277bf04fe526a40711949a158148677e9d2bf184f6636c03589d5698c +size 57397 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79ff1f7d24d2b565de35f6ae5f559869aabf4220 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c5c98e3b766d7a5a05b73eac1a45fe7fd66b45bf34c7ccc6a0de49229b16b0 +size 25098 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3156ec3e5fa86b92532a27d0a78bbc0c1d5c1e0c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f43bfb7ba7717ce08d12902434529e1456fd6f60eae4caff022d0eb3b8d15a59 +size 38985 diff --git a/eval-results/mmlu/0/ckpt_021/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_021/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1aeb46e2229dcf8eae41f8eeb50589427de9f572 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6b0a2e99bb719c7bbf2ef2f08426a4113c9417fe7d6b49ae38435a7b2a7d9fb +size 32519 diff --git a/eval-results/mmlu/0/ckpt_021/results.json.tar.gz b/eval-results/mmlu/0/ckpt_021/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..843390a4a3c85a96f9cfc8f93ff51dbc822cc99a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_021/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:311f4696555270eaf3615f4910906314fde8dbd5f5d302c3b74fec35db900cc7 +size 7621 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a468dd13f2dd8c62dcd9c6bbf467386324581e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d577214283f02507fefd2d9352ef1fc8dbd1f4e0746cc98c1b3495138c9a9d5 +size 16954 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9caf01d2fe8cd68d5a108fce659ce5d79c347082 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd6d631fd76f14d2bb487fbed23ad46bffb42c329b7e506b5850889a0c9cfa46 +size 29389 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31c77d97c38da4711b04b4822cf31e0dc4e90756 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c1fd4e4ece23a8776aa8bfe41c6db025d681334f915bc3be84750cd38dbb3c5 +size 39406 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..687f39a12915fa20c9d0f26e398b12219816e252 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8fce6d48235b491a76b640ca2b6a493abc0f6e244956c6782308ca79edb16da +size 26471 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e318762b10208745c59a0d5bf9bb7b9fb423af0a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:840c7ab757403c4f4840ba82ccb3adf875e8637b8d422577d4f42e2152582480 +size 60413 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36304ade543dcba680b73c5d6937241f7baaea41 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c579a843c4721beadecec4baa386d49c6838d68f07ac8e18ecc93e9b6f07d0a +size 39954 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..536e6d329fa769ff182f1b669f0d8ebf7383c6e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c9b72601936c7fc2c9a34542a3acfd19f346bb0f29f65c95e3dd99caed3b1e +size 23591 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cf0ae27987bf639448ddfb67546014d36f6b493 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea6dd5955af8105b690b691181ce036e45f14fae7cc419604117bc7a741a496 +size 30921 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be843c4ee2579830901c7aeb3d79125516471204 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a348be6b12f261f498e7a2a16d253d7f27d4f62481548bfaeb5d2e3bcc53ff59 +size 22867 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55260853edaf23c5f3763a2c9168be44417be8e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac45d4baf4e84ec9cd869a8dcdedf37172e561f6ce6883fbedfbeecf5618196 +size 60399 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ae4a2271070d392c3e398ab95715bd2c22d4f88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed87826bcb0dfb27f9eae100b8c28ce81388aab0114f063cf21ef7dad8e6d2e0 +size 25561 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49de1d2bf74ce755e70bf95091a23d2e7991fce6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42f3e8ac26871d7c4aa8bb1df71c2fc16229c31713e13a31423c9889e8ea2df +size 25489 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb6f7ae69cc6da8dd05fe3699a9dfd9c882b6dff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4915a440e02ba118bd03f220930b655dd71c7a2c47eccf4b2b3b6c01606b3a8a +size 45845 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..619ba9e161648d980e97a738f403eeef1d38e69b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e0430729f889edd67597bbb4cfd31dfba56195abf0a8c630e77c981bae95303 +size 31277 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a79d546bbd22c734a739ac382798ad94adc776c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f936c529959f4feecb587249e126c5da916ed359b3f8a4e3e3d96cc6468dc19 +size 28436 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c58cebe93ae0c51ccb6e04fb9693f3823e1fbe22 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4421c5b7f46330043cbb403c141e2037b63b79a173be9f3ae7e99dc8c2df7a +size 74052 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29d4803d8bc7d99d8a9f9e04248bb2c6d70dd5d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fe1e76f28a8f4358e12a9707fcd4484a641e06fa872fd56312c951109ccc4a1 +size 29981 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2489957a919236ae6624683ff6e6534480d051b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:196dea3ce6c9ee546d8cdd108ec2b37b17ee920dca989df4a12d73a47c6d05a7 +size 18956 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0181ae096c679f57718c63c607edad7f1e1bcc10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6f4ff721eb40251b4c6bcdacdd4ce0700fc2bf6fb89a14f1fee1f675b50e248 +size 86995 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71b152a9cf08b9535301c7b1c2ebaeb42c23894a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88133b742efb491ca62ac51001b5fd12de6727dc5cec7ce685fa18c1ad8540cf +size 49523 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..395a472f2d71a5355d8612c210c6cfdc53143be5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85261cbfe60be1765f9cfab6c11d0d6757ee96e32f2ed422afe90a27b5409bbd +size 31376 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb769c4d7b224260283b7f67c2630c3d70ddbd63 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65e7cccb2acbb9e1ae9d6c43e0b8334e6376c76c39e39ef2125b70f43e59d1a +size 144565 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c87ecb454610ad6780f1710e06b911febd3d719 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ddddb2116ef6b089033069f0a1871e3849b4aa39b3e30210e74ee6dc1b08b0c +size 43982 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d82350588e68f04587fbd4646b56a0613330b819 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53331598e724438c88a9d5dff816fb722e89b77add2ae7e4d38c3f47dc634c84 +size 53767 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a3adb8dd80c38047a484ffa928bccafa3391c08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90863d5331575264edfb5c2cbca2564bcbeff8e768e834d27ae96e9535ae5d99 +size 91811 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20f71ff13ed4c5e7f38f52f83e69c5ad86d1b6ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7beb9c8e6905930aa77a1d1f0b3a0be32226afbe91a93e359398ddf45d2a931c +size 56923 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f9e4b9c96b3308950d5aed13d74d8ff6d76fec0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63ea50705b8b80c59ea3c894c9904ef74c3c22bd6844d62fbb30c4ae79980445 +size 57859 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7adf796dad8839dc68ede96ff7eabd480f643490 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d44470febac822020385df2e9da15c60b0f3445d378d695485ad562b402ce08 +size 42635 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1da1ed8e4893a0e5efa56d007d8c48c24fae1d7c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94f693c0a8b0d5f64e1e73d35830d71e781f64c87d3afdaf318a3e6944cddf40 +size 142134 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..589dd4cf5c8cd4f1de29d6710fd37cbfcbf5bdac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfae4274590fabb6fafd5e97388611e112c42838011aa05073be9f58ddc1d06b +size 70733 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afbf3bd0424c57560676d2f985bcfa8856d55a25 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d98448592a0caf549bc560a3133d9b21bb1cdf34efc9a9da1a01298c36948fd +size 161142 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6f916e859c5377ed8ce0230525c7980af95ecb6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e89cf6300b4de0629eac1a632bcee8c50eefe3bc163016934b16c0513234b +size 208493 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70c4606208ec42d4e0f352e7ad0bacaccfcbab15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce1ca7c891e08ac04643231c893c49f80a3bb4ac0f43121e4ed80e8ca63afacb +size 49254 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c88e4a2c3bf925c4b1509d114df78c44ddf9d2dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04d59c266992387dc38e58a68b73fb51e00216b9e25f7b2ed9c945edb09e1b7d +size 31133 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9bda2779aae15380d86e5209394153ffd186006b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9bc6d4f388cc64d9de099391865fb83d7a1c14ea74dbfe7e0522979f024511 +size 35665 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6b0b353b7849738da9bfd656f667bfdf5c3f91f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c46b4b7c8d614c9960bd7f0e49f59967a7c27934f216c434a0691722eed45e +size 29205 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f0ae088fb3fa3b252616351c8372d3172656c6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08a9deff34a881a6f7873b038c45e0b921c1abc87e42def45635178600ddb2c6 +size 39865 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1624c9cbcb15e83754a0fb738502fa0fea395d27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4174a82678a284df7e383663c4804805d32ead295ee4a3b3a14d1d246da26dec +size 26817 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c42928147d2325919d3d2a600e47702cb39a834d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ad41699eb78d51bcb74fdce9f2834abce25a5d0db40031443dbbaed7512e277 +size 21426 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e37ef9c968c1209eea9b49359228dae1e6e210a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62687c16a0a101fe98ca73ccb6c9bb590a45623926bbd3afdd34da769149f685 +size 56736 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f1028a7888e5056520c90e6b23ffc56add8c3cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f4b7f8a74202bb47829010c12e666b1f8afd8c4c6332c1c62d3e03c965643d +size 21949 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fb7133fab627b8b0294064fe000f03cb126dc10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dc74f4f15af64745d824b8c005796b2a58cd12103e8ce5055f42b76e8c34d29 +size 167135 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a8ac2820140ca86c19bfe3aa8e18f679edc2f3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67321c580e2c802df1487a5b60c65049caaaedbe3faa2644217fc378b95142d0 +size 89306 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79671bc78ca7aabb952b11fa583105612066ba2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1eeda814af2ed1b25aa679847dbebc925d16cdeacc4eda60d1b4c5419321ba2 +size 153320 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9799ffc19c9a63ffaf0a3b61f92bd6ee5ad42e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d3febecd1fed03501fee18df02ef2bf4b2bcd61e807f7254c1927ec0fb95cea +size 78570 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6787ca1eb0e4439817db2022bbcd4e39b0b9335 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bff85e8039c9dea106a977c4f40318f245811bb597adf4c38b052ec15493570 +size 73569 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0db0e2b295328dac9889aac9fbfd942582d9a28e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:910aad05ffdaf4b568099153969c8cdc9d6580c48fa0a7a2366049078e8e523c +size 81561 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cacb2bbd8392533fdcfa087e03c94467aebdacb8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3cdf956d4bd3d6b8d04e6344cde19f0613c67995730f5206e64e66278a76651 +size 89375 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76838c000d28098f07d1f4b7bdd086bf302ad692 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57afcebe58acaf13a5cb1f9e7b8ebd04a59639557ab2fa029f7c8da3888135fe +size 1000007 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3f53f22c973283d1d518c7d4c67ab20e3f965ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52100a9fd761b1579cead26f9610401fe967a6478aa0edeeb5fdb6c2c2ca866b +size 131997 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31b447d44780c9a42152772db8c373ffaf0a30d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d94464223d64d9c1779c5bf09019bfe81a8e2e0e2cce66f2d0555a15bd67a4b3 +size 181095 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5255736c8f59b9122b2787edf305b453b67eeec5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e585e5bfdea5f9dbe4ac026db61538ffb94d36c05c7b07ccb4240d6049793224 +size 27180 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b622ee4fc5500096b29e46fa21c617229a2478bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ab05cfcc41d686e69b6f618206dedfb9303f3b856efc28749a091cdbe9d6db +size 119662 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be84cddd3b1fa61e1eafec284c871c219f0c6982 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d332ee98a430b09dced0eaadde78a706398864c5461ddce3499e8324105c6a45 +size 57391 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2915bc2452d40f6f6f1ad4a6ce620ca0af94dcf7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ada0918d567f4b21cb06c3ac4d0068eb9988d3e47661034ed707d2209931f719 +size 25047 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..844bc8b6c82615bd489266c2f531af0cb4bb44d2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f9e7cefa2a1f62dfd6476f14e84da0e9b176fda3bad4ecee822eeeeeb5350ba +size 38959 diff --git a/eval-results/mmlu/0/ckpt_024/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_024/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17a7a9c4aa1521e1cd95c160bbd4e2cf552fe179 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b98aae2e1aa68ff2edb3ca6549fbf8122e259d08ae2e132f39fe7146fead78f +size 32524 diff --git a/eval-results/mmlu/0/ckpt_024/results.json.tar.gz b/eval-results/mmlu/0/ckpt_024/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..758f110433f94c5b57c2cddb1853866d7d36a7c9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_024/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2faea3b97930093a942fede803964642d70300677eb65555ac51f052454c7fa +size 7599 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..938c5149f504e38763f8f10c2d8e01ffc6c662b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e19d71ffaf9254d208722d9dd09d7f6d79956047a7f00d8e1fc0fa295d1def5 +size 16931 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c2afc8eae02ab213fbd1003262b6a91fef4763b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e886c82fa69f2efee83b67bb3ddc396e5b5e383c55e10b77a4a2b99603155de +size 29453 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b50dc8da524972f6bdfea2fbab0662fa445fc57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9129fd4c486c93e687b12c4f917180f68f4969c7c495c54fc662b444dcf5eca4 +size 39426 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f4e25af9f4ba56650df8b1fb605b91fc1195488 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8390a7877908c38ccebff459051720dfde806d8f28fc3cae8a76e1409ae48b7d +size 26464 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d486972bfab7dbf821f2ea555f8c4102609f82e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9154a85f99380770eb46b089401bb83e6661cdcaab36a7dc626f7c3957ac2cbf +size 60471 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ec98215e7e3296425b198b545fafb36f075349b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe72fb4136f93c56a274a002c93e0019f27cbfcdf8863d9fe54d61e160c8d31 +size 39984 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3dfec4a9f0c815a2ec32dbf63ffb9094a73dbcfd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3a4181dc52adcf89fdd3a44913f6bf68c5d4efc1c8da9b7c03d0f37d80fcedc +size 23597 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e716a8ebb1d9a6eefad5e472f83be10b49a98847 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e113e8e33215a1a7f1c24b14ec840002ffc4adfac04213bb06a28be9955a6b17 +size 30879 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9af861996e77e0a0aa1117cabb9f44c998317cac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2cbab8f9fc995c209ce4a2652906b5b66afb15f2bee53cc03932b6ae848837 +size 22815 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11b0315b577ed403db056e8dc330fb510dd108c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b7190b6b91819a4da05050671d937b47f770ff83560c4f7c61e1813495fde7c +size 60378 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c7e5f794f7fc122c06628b9f50b007161e04d83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd38de523bc5716fb32d342ae3f08bcd87dc4a82881b450989c2df48a0028c01 +size 25505 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98a61141026972842452bbb0d77032a6a660ea8b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ef9491fd2336279892a058557a36e53cae50e7808d2b1f3925b9fff72f34502 +size 25500 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ca2825a6179fc66e34aab0fe1b1e991dc7f52d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2dc46aa791a10bd1ef301a5db77dc3d6fb7c02f860cf35ba0d015bf3c20b004 +size 45865 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cddd4c000f928b8d0cc18ff30622d0ddbd79686 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898666e1693dfe58b72d446356126a904292a79f5a967b521f08f8e191cdbd90 +size 31195 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3780da0c99efa7bc4cc39428f1d9ec57f38fb0c6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73433eab8744a84d25fa61ae7829e53cc8ef85a20262b8ef1f617606e0390d6 +size 28445 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98a4721212252301df0f49a4fb980d13427bc10d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9b1146d117750d7a4be80588c27d22eacfab46247db5fee39a8a089a5bb5473 +size 74001 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab35706b0b11337ab53dc64b8ffa8c0afbd23b3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdefde005803e724e226827147818fc97dba3dda241e207f2fceb1c869352484 +size 29900 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f01eeb2763367fef367ba407d1a0df3f03b663f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e49b3d58adf4e30366bdb522792316b3b1fde215e821107f0c27fa67b0314a23 +size 18879 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..221ef589d1871b8082ec1f6f09540e83c913a72c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208de11c1594257c9428001916c9428b54f1357f560cf5dcf6652d89e96754e9 +size 87071 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..017ee2aafac5eca865653b42a4ee96462ba731b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f459a9fd4d66a55cd428a3b91d1e80a5d9fd3f1f9e8724d465f43b2a6b1609c2 +size 49509 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3956f92c34c696cd705789900cfa3b33c2fb4084 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53cf02f05f1f720198ac58d8e8e1ca469b99bc6879f399c58589f17625167b27 +size 31349 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..171d5c7d315c7d8b035dddd542043314b8e551d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:446bd9742383737769ce8fea1b516ca2e45f6723d05fb235444f14645bcb6de8 +size 144457 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cb7c5025c1f1a3de2135d0b16dc878e18ac0126 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc826112900ebd868e2ca8f47bb6525b6a407ea910982111f309ab7645d216c +size 44051 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..733c21bdb961c394822157fbf3382cc5c2149532 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52de3e5e3801abb5c2ecde0db715e67539390c46926a211e6c8d3f5b32b11368 +size 53769 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0960839ce4532f117349b8c16dd6be467730c4eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96efc1aca55b5c84d76378c68c96df4c0a0d3bb10e2cba79aa7f64deb97c7c4e +size 91579 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35525ab86c155fefbee4c9bb94ded19b903b3551 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae32309345f07ab53b3da297e5d4544a0d1f3c284f6195312671d4162b684ac1 +size 56866 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d15d8d46ba5cca1967683440bc756c9ee9978fef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d072738c885660f2fbf86f0c66a73fcfe658f7fae81ed2a16b1802f3407113 +size 57721 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b968053663572863072a9abf878243d33282c6d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19682f2f201e22c8fa2b613da1397f0773126a49fe431edd5aae6d1587d9d909 +size 42581 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19f07a8009b8ff10debf9deed9fedaaeff3921ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bb66253de61f87f9f590f642eba23b97ca5e358a8c9e593bae5cb115f9fd45b +size 142364 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..917bb41bc9b1e29a14eb8c492f4c260fc190d2cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd658c0ea6d220af2f83766b920282f9aa8c4cd68f85d7ddfbe5fa18618caffe +size 70641 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..639297c7f1e728cf4c33afc38c135642a92e72e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18fcdded5cc0d2f85b8f735a68fc4c9fc95e6ec7b510c781e84e171002c6b1c6 +size 161121 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..345fa343eb046dafba336e759303d1af28ef6f0c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c083f276e94c4e15fce021d87a9208ec9744f52dbfc9379e94b8a566b92e8afc +size 208301 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9b20bf8b1b6bd9ccd0274e29435e7e68deaf1eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4589678d78347c1eeaf4447fdb84e4b61389ff90ce52bf9d4795e7af1988012c +size 49301 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f45bcd781062721be839024263c46a81d13ad41 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a4646d311d8d9ccb5589ef51689aeea5a27263369b2ec38b832a875988fde80 +size 31198 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f4d2ef81de3ccf5673b0c74209b4441e8ddd2d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c0d5be571fb47ae8222f07394e5c4d87850509ca4bd201dd2b8e8512c752a4 +size 35633 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27f362f647c1282540743105ada9bbe183a2a02a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b857904001e21caec1106227660acb29c2b19ba411c4858aae36478041880d2 +size 29175 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35588f3a04b7828786de85b9c81336ffaa312c52 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:719d335c125ed6fcb90d6c30818eafefd2610a7ceb1f7ce944060c971282b96f +size 39864 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..254aa3adcbc6e3596504da142fe8642ccce4ab4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dfcd1d64a6ba791370b3978ac6678312ae91d9563f063d4896455c3e356f9c2 +size 26780 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e57dbddeb430fc2edc6442ed6eba776f2cd5e415 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96ce83b90f4e8bd2c452c8af42934ebca0155da24d25e21f186842d8f382718c +size 21400 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30bb1350381273b6c0ee53ad1392a7664dd7d7c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db3e0d639492dc70f965e73d968e903e60b8b8be37c3177ff87b2465a049b61 +size 56754 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09ac86ab90802245bdae9b4ec0bb57c78f981fd2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac801b64c6922a6153138ff9fa4f72b5ff1288b525587a1f118413a15a34fb5 +size 21978 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce2cfd249f938d7afea51cd56c04e2a5ea1f3dd0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbee431af28a85e2df328f3509096c08de4e0ccdb8b70ac94b7f3936a8157d4d +size 167419 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e57eda0355d1a1e2bfbbd244c49d58ecaf6fd8af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc631c87e840265270205999ab2ef3f984dc55775f0db0c5d01fcd544dd005e2 +size 89249 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5745922d2edee1111f2a40334ca320e55b45f16b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b035bec8232d1e489cdf671b13f048b6ae67c18064455c5a149451f7444c716 +size 153543 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a97e38e8176e7b5bea6b2b730577d1a730cefa4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d220c7016337a0fea089be3ea8188095fef53f1e200cede38ed27fc0845a7c7 +size 78559 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6389cd383df8e6624cdff51fcfb2a3d667fc3cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a35ad299695c2c9f918f825f71d8754fd1e0de8f7fa612daf20fed52ae76f48 +size 73439 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6500cc6e3252bb51cefa7a957056c6e6009c43a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fa22928947119c886a52f3241a68ee050aa02f20543b0105e101c7c547e7351 +size 81717 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f098c45c087d97172c7eb6f523ba17efbfbb06e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c06d84748d2c8e33a1cb685e209568f36b461f12570a646b700bd5c876a1835 +size 89166 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a548f2a2b43af39f0f197aa2da4a25827b378e72 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:960e5b53b0159c398c2c4452d4c4b804b7b8ab366d87d776b7e88719bfc4917c +size 998994 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd3e56fdcbe1d881341976cc95c1a90e958cdf33 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264f28edd7b14c752a272e07558d2e05f4e8ca85fe79d848872c67a786b77af6 +size 131900 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6a88c0da49ea321ea22ca26f85b0cc7f66f35ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d7287c4cca0861616d3ebb4ca18bd370d132b1bc1215850f2c3245a54a2191 +size 181061 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9af63735e55376ad928eab704acd500d325f9e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc517504c06b8b945d2e863e2e298479b10e86bfcbf9e46d32a3becfe107d87 +size 27186 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9627c022fcf080a521fb6ad37e21209703871db6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9937154d43bd1b20391ef6c8054c667a81c530a760d1b76a36618734d3f5a7e +size 119473 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f920e1050b0722a0508bd8c263636e6620df4aad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6ca66fc76dbbcd6d822e2c8677a8318c52219ef9598282bcf4c2f23c1bf80d3 +size 57363 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..524d7f8b53111f420e9fe57f71b83a61371c8dae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9abce2c4f8cb9a1ab16724e48393632b4757085dfa175faa052a408f50700ce8 +size 25087 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbaffa96bcbf4ec0eb16d787e8b70d50cc2df834 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ebcf3e0d1b38a5757f5594724b161dc1bf3e0526d964e2e837a06df21135afd +size 39032 diff --git a/eval-results/mmlu/0/ckpt_027/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_027/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cf5dd021c06945b164257908568f33a44442a2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a75891b70c8e150c786f5b597a25516d4aaafd3bbd866552576accb3773b5c +size 32522 diff --git a/eval-results/mmlu/0/ckpt_027/results.json.tar.gz b/eval-results/mmlu/0/ckpt_027/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39eac327b1b4506ed50088395c1586c5b7fdd160 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_027/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc67cfab61c199573c0acb7f30cb0b445957acb16901f5a850c92d864e5b5dc0 +size 7599 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26e4600ee853a4bb637bd018a54266d5cf4e8ccc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e3d024e36bca6d6c4a269f73ea6100d6366ca21be91e100611df0f7a7b53f6 +size 16955 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f826fd20a82a87aca855944c00805ba4b15b9f4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4e7ec4db26466e99252da3956cd1e87cbb8c673b637607283dfe68f3e3d466 +size 29401 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..586960c36f307e78309947d4591502da26e6292a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20c11bc7bc08f8019cfa26c803955fadf845a43b476c3056c7f4d9e2da8ae61c +size 39459 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b4d65bec76c8e2ed573b5250e5b1885c33b5faf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae422001126d42620df2bc31049fb727b109e1c0f35e52edae6cdbebcc7e325f +size 26452 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90d782d9d654d33e3522dc9fcd36ed2487503f2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c71591f929a3bbadf977ecf5b011e6c382a384d33c9f9e76a9e1f65421a4b21 +size 60534 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0debc48307c07911b873b98a617012a566fbf7d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751f4647ee6ef5482f517acfd91458e6d9661438681b5f46299ca02581643cdb +size 39948 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4f51bccb7c7f1be8cb71a03a39b645fcd4fae6d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87913ea31703099115ae87925c8db1b9d722748302ea257118909497cb5e9e99 +size 23581 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88d0be0a704e0529ce92494646e75df23d9ad3aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ad45ea0b5a0451831561e70ed689847d158db525436b3d43b8c4e41c9bfca42 +size 30866 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..785b8d129d8daf019b364ab744357e5e3f55a719 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cc46309f87c52d150b46807e480a0eb3404b04892e14566495f28a7c36deacd +size 22804 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53ab2047b52d0060d037873b8167d67bc76084b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a33b6b67eca80c75036a211f273bb6eeac4194e6f0ca7f043783dc4594e507dc +size 60376 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa9aca1bb4133c86c9abf6f06f6c912e08664928 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94148784af8e813198dec567403931c7132f94498991747a0331b8487c026609 +size 25526 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f00240abb91d1786c0bb48d2a74c34a9e370c18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0524e11587df7d49d1b11216f66e7bd9a6afc422a489c7a1167cefeabeb47dd +size 25509 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5991512dac0724808c7b92d14b67d4bd75919988 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c403ee5d22f5e94a5294a9840d00ba04d70d3dc27a4f34cb31c7bacb46b8ce24 +size 45907 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..425e86bac85b242141e4f7f5f98e38ff958f64d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6e5087a2ceb659647cd5b1408ba5c80f364fec369abfcfe3b1122fb3dbffa3c +size 31202 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da61dd6ff091a0bf854b4a7022dcb211ce197e11 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28784bae9deb14ff8b205bd46d2691b9c85e59e66281b15cac9abe191e4f65be +size 28434 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7873a733cf3da822e0ecbe690828f5a4e8fb3186 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec2a21cf99d3d9a93f59e49baa691db15cd133b9f944a65ea166e26b2f73832b +size 74016 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16137b8583baba5fa3e7dbdc62922386fc7851cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48f60b11510cb79b77bf2298314fedac56b6fe5d9a07e97a88dd6fb7e05d067f +size 29938 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e7eff9e065be17b8958669d53156e55a0609428 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b39ff535bff50d664be9343f05174bb3327bdb8ae96d28f980eda5acd9f2c987 +size 18926 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca3f73a77e2c0869d20987af8c74798dafaebae3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c570ed3cb3a079a8c576c694180de1e148e1ecf9c125342ab00a00626c579927 +size 87098 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffedaf40baef6b4e774e5257d918c4fe554ba2df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36faafee41e3844c112b0745161c0d9f09da51fe59a0ccf824a34c99620a89d +size 49489 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0301eaaf8abfa65e641ad7fb2a9a4f1431e7d792 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11124265cc45efb5adf7821eafb4916f0246f291a6fe48fb8fa6d09483b662c0 +size 31315 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b6cbb78078800972a189154e2e14f2d3d867083 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dfb88147d22cadcd1b584669a2d8b39a4700d88e5f16299e4663293f30738a2 +size 144602 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a525829ca874d501855d1a6ec46e28a18d3faf6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f60b9f57ad389b7740448b0c5d1771bdd499f8233fee4583cca64daadfbaff1 +size 44034 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89f2c1e738533da928f15ac1212b1ce0338b43fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97770c8ebd3577762f8b5401f8796d45b179b2f0a27fdf7eff5726e297811316 +size 53808 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab9f46d328b949a33e36151ff156e3fe6db9c30e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b411cd1b9c43e4e3dd774c8e77ca088c0cfb098c21532ca4a774df988cb47377 +size 91624 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..180b8f56c705d403548e24d91801cd5832dbd3a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddca5fc50d5d817a2cdcd68aa1d1786cfd0a27654ec8eea1ffc2828739556b6b +size 56769 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..915bea1ca51cbdbd6a91c3861fc9e98e527f60cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:600e0f2723d393946e49540f3bb08ad718b8e22a8d5a1ab13d2e8b8342c95b37 +size 57745 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26acb3462f991f351985df46c6308aed9c50757d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e596668388ad08cd1fadd277e863a59384c7b9c1a08a733370dcb534127042d +size 42598 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cbe6ec4a26626958a34533d9df081180cfd8f80 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d758c14889f682c93754a1ffcacdba61d9a623bd75779dea2c350b7a3756b82 +size 142238 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc4522c6cfcbdb2de2ab47107068ada8f7bdd719 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c575a5f0f6e39fa456bfe3289de15d6805d56dc901e1b2713bc2e84c487b031 +size 70654 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec39ae146e0602a25795e22fc8b27a3b235c05be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c66b0a6a24648964def5a653826fe0747e532d35b8c247e952c2f31d6dfe4dc8 +size 161301 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c8f75162505581616fee971c719f4016e5b3089 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2604c0ab00d6d0e7b67efc55b044ad781d87f0348b82bfdad726c9e9d5dcd547 +size 208399 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc99a747cea6e9979d6c9765f34e0c6eda3fad2f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:889a4a0961a2752f2ed2ec664aee5d71360bc71df21b4f49e0ce15fdfb154940 +size 49262 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f6dd25dc6583daac09356f5fa59a455f8b75da7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebbc44b6fd4c66bdb3270f0e50e7b642339928e63da746065f3029494e2749d0 +size 31153 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59d7286416069212e596e339f0c80e13d44e5ae6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c71db285ca7fe5871e4763c8c7184ba7bc5f82c5f6eed7f11bd502ec7725f6 +size 35661 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d8fbe73e74bfc7389872284d17441bd01d271f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cba99e8af2d70789c4b97ff7abf59f287940c534b4de2c4401ed4199af5f99e6 +size 29165 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76d4b0864d72b6bd25cb974ed6586e71ff04fddf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b3939e63da37e445e6c3989fbd0829306bcdd9524aab04a1eb1b745cf7abd0 +size 39890 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7c907c5d3863be1d647c24e7b6b36eeea059c46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c93065f3c993d4668e148495122f9248e70ac626abe4c41153f44afa7cc21336 +size 26750 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e9dc5135511068ac2c74de080484194aff684d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5352336aa98278cad545b4fc8701216cbd6e1651b6cb64f8556535bb9ec82389 +size 21421 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d37a4705facb550274b909c2f7a05d121365b4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd51e59a2ab4236912de6e292adf0878f2a2f1a5ffb77ce431ba78ead8dc3614 +size 56792 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1942d3f22186f6a6bc7f8881e342c5497a77ee3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dcc158d75acc38a63b2b52555adc6361d6e9514447372904ec7ec4a7b9bad17 +size 21981 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a065df4844019d9449ebdb573ff847a9fa82be93 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e45307cb69dd98b2383ad8d9632d5de70a99bb83e4bcf5d10923b26be5b9a46 +size 167568 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90d447a9e7fadd62d3a1fd712d54f810a9cbdece --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de79afb30e538ffa5661e212158880069e8ed6c8f65acad9a1ce0f7a244c35e1 +size 89232 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d0c4a6cd2fa132169aeb596225db5c0bc82bcef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a709e167da000edf77308f932ca2aba5259eeead4a4cdb1b2c422e086983396c +size 153215 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0b068ea721894b9a6358549300aa98e4cbd0813 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04cdb58bafa23373962a4804119dfe30721631ed89d1d00c4bd43cf15c7f17d4 +size 78533 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62993c75c7fc942fcefaeca248a4a3beac3c7b46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e933ddbbabdb0df1cf4f355a658fca5149a37eb80b5a37135f5f269f15e8a44 +size 73540 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..203611d11650f60a3d66b750425b1a6afbc3aad1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a78632954600a3e8de7a49c7582297804a51056f04123db41e68a2d69894c9 +size 81624 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..535d08bffe490911f2eb97e8f9d78d369ae345b2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:052834081584a811a44ad10c1144b5eb4eca06770052b22a4ad5f4c124d9f850 +size 89217 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..239fa69264ecc36d27d0871d8bfb67ba6afd7799 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3088bdc04b75427a2ac02500da135e6bb8dc4cb507eb76dc4a2b3290fd26516 +size 999396 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..661d1018d37e7e430b2d45e023274842aa7b0cc2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d559641e4080fae4ec6ae7e9050f620d65c9f12df541929d192eda9c3a9799c1 +size 131823 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9203943a69114acd177f390a8e4964b62a1bb224 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd29b7e0df4a6132eb9709e293f80eb623e04691e9f5dd30b451967510dca22a +size 181069 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..904ab461446a3f771009bce217be2a827fe4b71d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ac29a259efef342fa888a85c13bb1564c2d2a5be1652b4689bd8df699913bc8 +size 27264 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4e1c714ba95d9bd1fcb51e230826f1f07ffc9cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28587cb4f7faed96a1f2f097160caccc63e6535ec2d142cd9800300982ac88e6 +size 119518 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9661626ca323a811569ccf2c51cd7683102f1dbd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f4160affe272405bbd4921a2716e6cba3d3fa223fe677cc85437474230f5b4a +size 57404 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb94da7101c8f2c0081c456e2952bac5e5351ff7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b09f9b32507b9842cd6ad70213ad0d020e0f1770e52c22c4167636c30b42e5de +size 25129 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35ff355788c5a4c6e133681525c8ebb10b286a9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f8ede23c7ffa1a9d39251e536e4931c071f4d1d72ef80486d6a67dac270f43 +size 39007 diff --git a/eval-results/mmlu/0/ckpt_030/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_030/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9c7f734176afe6f7c414c618975c43abe89cd36 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42076e380515152419d0ddf1f56b4e055c0bdf7be5d344cacd24e80ab3597716 +size 32555 diff --git a/eval-results/mmlu/0/ckpt_030/results.json.tar.gz b/eval-results/mmlu/0/ckpt_030/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..690ab5e53508d45bd5a8dc56cfffd2f4ff2ed1a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_030/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7663289b2bd6adfd95d542287d5d519e0cae77b7544fca8c03a37fe9a504ab64 +size 7632 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16dabffa8638dd2c860d01f4ea02e107d537f6b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aec29f260e53d7ca2539f001c4cca6736526e136673c2750a95e29f5f915bc64 +size 16926 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e093216929993405a075bd3987f8dbeea37621d2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352c62ad680ec61707c09402cb74fbe197552819ead3482e6a49165775faae3e +size 29487 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c21ee22c36ed009bcdc6a2c8ad28264fb5cccc3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:738a6363b539f7eb7286cbf2f5c7d4a461ebe02b80bdbaa12bfac941740ff37e +size 39494 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f53b66e94a87af98c38b666ba0ecb9a3b12c348 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9497a697590b1bdd1430a4dbda2fa68de6b205738c04d89f19e2665f3507c3 +size 26494 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e46fb060d4b5a5ace39c74041f0ed78198b36fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39ad0c7a9a9031d3ce1a8e3ca96c4536d98d1262cf8ba38ae15e28a7d06cc417 +size 60580 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cf5ac6c2f2f05284602e9a573fd9f5983ae080c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:194e176596c93bea4960ff4b586124e0b0b904a130db8bfd141791f3f2208ab2 +size 40010 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f7f2d0a94aaa7c845dcd28b1007e8da984ac5cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:118e33d75c70eb19594c846d9e0cd3e855ba86cd8b04daa24fb00d7d24627452 +size 23605 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f144ed4a7c220834d85d43700d7af365549054ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a22c7f8992b3efa4dcf53fd750a2e982b689be82dee39d0f07027d5b13211ff +size 30905 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc9d3f4e2e79816f140c24b8e6880fa5a8ba829c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd65b120ab11ba96da6928c9ff41611ed0f16200a27d469d180afd12141128fe +size 22861 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2d9bb219b08a46d43b10fd50328a043a1cd2a2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1ff787265e9b45bf2c98250cdb66c9c8d63ea2c76aeef56db13c29255c6164b +size 60426 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afcbde825b35693a73dbc29709872348a0ff73cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf7d7b169a27ac781cb0c7cea4375c695987dd3cac553d645e6cd315587742d +size 25548 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..925291551e23045de4d127a7cd2748de3b830aca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5c8de7fc0f64ae5aa62fa46f7bf0f7041d56cd4e683a04536870e69ad0cf3e0 +size 25488 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..569310720ec123d7c68e279a373ccf293b33a66b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c20a288dd55aadbb948d291e5689c5b8274d8d5b3cdc77aafd8be4ad8a3738 +size 45902 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..198130647673f4846d6494eb0ee85adea2e159e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4dd7621e12151d7b2c70de04bc3f3361b1f34e18724c449bcc3ce7d1bef66c0 +size 31304 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5320151054ed38615c601ba0f1d6f7011c3349dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a258385316b945cd663a793ed9e306c49fa4fd59b1ff633cdede4ab6c8c931 +size 28481 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fff8318c3fafcdec117f10b8f8f44ad339597a3c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673356fbc7ded19ffb70efd0f275ca6f868871d934081fc3969f6eb1dc83e299 +size 73927 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..457b0bd379eb0d9b4b103d860060c1a97e5f2e88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:295a817ca2699ff084328d2d9038390e85a8a4e29345abf17cfa24b214a387e9 +size 29937 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fa6dc6f794ccb008e1ebcf026998cd6597a7bbd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f47a290007322e675a1afa16d651a546ce4249e2c06d4d80d23e43307d4d06 +size 18953 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49b75630407aa45ce3bd48bfb84b1ce8bfeef790 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a3944bce2de366dfbe5f67a19bb06eb79994aa5e9bba53651aaa8ee8b8151ab +size 87159 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bcccd1768605452072d496f6a64eb994b9621e87 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4145ae79a92ddab0761d0c83ccf5e3e7a796b9a5f1ee735acaf2a13363c30958 +size 49622 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2cf67de3c6bd0691191e5d4df6e6825c8661d80f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd6d2e06a8d3a667cf6f8ec3a0436210fdbaa00ef7c898dd65ed45f6e375160e +size 31347 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32f7f330e346b4ba68f8d485121199bd1d6ec8a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10269c6852fff3e8f988276fdd95df497b5dc333c6398b25470aa696f7a91216 +size 144644 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..934455665d12dbd4a55cb96956e6dc52788accaf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb23416a503271d94475940863e2669f4cd348b8e9bac406bbab16d4dc0d957 +size 44122 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4480bf8ac195c1686ea85fcbe648bbb522e51b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33880949f282a9adaa2477fa508b33f7ca07c6afffc162ef46a38f2687ed75cb +size 53879 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..383ca867e7324986fb433725fe5777380f5dcc8b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac273da88b4c8f6ef2065abe96449a11eff8c84680eea34f5863131ed43f1742 +size 91781 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6baa5b041a741fd8721c9c5db9a0c8ee0240874 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65a500b7f62b89b3132764ef7a4d0fc69fcb18ef94422c29ff8d765ee7392c96 +size 56735 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..483b268984a6dda24380cf65bc3edc854393f28f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946086762faf36ec43322342f21386f13c2abfbe98a49aa19c3adbb8b5205576 +size 57830 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e435fdccf21c6347669b6f79f4b8bba5cb60125 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca82deda0c932281cb7fac48b3cdc090fb624de68c45ac9a502ade54d99a428 +size 42596 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd1a274afd11dd440df576a54368e9d5bd5b3910 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:360f5acc4065bf9f874623970aa05a6beb200ecc007a864735749f6b8c7b517b +size 142521 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82891a468da65407089f66f643404f0a6047a996 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce1592e4fe1f1a12ab9b2db88ddf8dab89b94fdcd73669f3eb5641f11aaeeab2 +size 70736 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4994419726c6dec78aadd4f6a255521a60f0bd8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca418450281740c38c15eba14586de804c86964750b12319313d8b6d15f6d1a9 +size 161346 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb1bda973b1754189f8476de7e99b4598ec63a20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6444efdd46d21796807aa657e8a03ef927c267e733263c3fd5a6b662ec1eb42 +size 208582 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27267f49741c0a95c99d4d0f05d8f725110ae2b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f25daff942bf7b94701d663ba7cb1324a7a6d4892931402fb5477c5a4e348ce2 +size 49422 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64405c5b33145fe73b105bba65a079dc14dab2bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f854a6c02858c11e96d50cde328067dcfb028e32163e8087a9f3fcb34a04b1 +size 31218 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5da1237479103b72a1310fe930f0f3a77fd19b02 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:116b12d534410370d7082db292f22373fde796ee418b8e8d06cb554f52815421 +size 35691 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da94d1babf628e86e727bdc256f4707c16ff83db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf562e648e7b206202fe42e28711d5187f25ff9324c87e5db2010b75f99bc18 +size 29154 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a58aba8eef937dd2cb9edc77b185c7af8754b48d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddcaeabc3beaf3e24da2b11bb536a1d4988cef5a20331ae70c66e394ae40422f +size 39910 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3adfd1f29cdde8557ccb1eb01bc1c78f112525d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca5c5c8adb3b24eda085047c2cfd98c36e489cede738975da27068a17e762306 +size 26792 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02914946e241dcb7fa48561b62a78333dec4247d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a367c75de8fc5a1822cccf7bca3fc8f7a2eea569d01a23eb45f045549d1fdaf3 +size 21435 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..debdbab60dd820d349bf12745585815d7a2cb837 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e6153a57b44eeb822802a8facc3849c7a85288ae97273a2965f0a8079638a4c +size 56834 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c017909d4d4d67e2726b533228813d32f3f0503 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcb71a20538488e0466de594ebd77bbf2b0ce6d1cc78ad5632acdfa01c3a70e0 +size 22010 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b8651bf6a4ac08cdf42497aa40e5b824ff8f9b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb907171fa2b9c359cd68888a61303d20760383aa5fb329d99e7b6b7b5ed86b +size 167738 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..318ee0b5f7e5fab91a5320f30da98b0448cc65dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115f574bbbffcf92d81dea5dc559b02512b9b5cb95225f34347a09ae9b942943 +size 89289 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6caac8b42a76bb812427f92f3659ca71a54bb7a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:266f280df295a1f7f8394f0e23e973e86f2441e0a3e5e4013c2a35ed33456bae +size 153813 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b18651f3b94501e4bcfd3e04fd6f0c77eecabc28 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d6f44b7984c7e2b7dd3d4ac75b517a5ae1fcca5e1cf1b740a53bf02ca16982b +size 78590 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..428dc1a4004214b54824875aa42f6dcbd56488bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b13304c34a97f2724ecd6d5e09d8de596eca09721c7e3fbfbf9ec977a57bf4d9 +size 73500 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64d4d8c766a92b1e559c732b639cb54a3a5eb330 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e549787e8b649cd6261cad1e0f7f1857362f857db52c3fefab55917036fead4 +size 81753 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fc95695962c43e1c31ec355d89a9a070628917d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec5cc1de88f0d0d3440811f6cba4cc84e2331e648e833cf880084484bcf65d9a +size 89248 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ae948376272f642550a467a30b494344fd78d44 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef2ac0f377ea86f7a9e69e304a9d2ab7d9e39059000ceaa198662f59012fed2c +size 999970 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..112853e6c04226bbd6278cab3823ba5287a84981 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d29c7fa39dbfa4caebde60bf430c3df522a91708498697d0394715d9552d60c +size 132238 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61e1220dd71b51c0e92358efe06ece7deba8650b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48170fe92edd581fedf0071a0d8e6e978f66b07eab6f462ec3b2d5b1b7c34831 +size 181361 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b26841d81ea79d3b4d0aebd25fda46a6be0d4f1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba884354dbbdbfbd424a54643e3629515690012a7f50e2a72a2b27f959b22c75 +size 27242 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1dc736f85b2f885f850b8c5bb17c0ec7aab74d3d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fc038dfb984fad6eee430a40563da4105587cf7dba2ddc3b963e737cbebdddc +size 119521 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93ce5b488448351e5def060254741acc36aee78e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b127ce629bf9e58a98d5c32c10f33e83d437bc9ac8b03f24c76e2818367ca32d +size 57366 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be82e733345fd95e47444f372f97760d25d3aa4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e92db4e66222ff572092537269b31a80ad4827f3aeee61b51a6364a4de5e6def +size 25122 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5d28e178a047a8810c6f22e200b01b228d9aa0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd9eab98334c3103405170b1333ecc3a46c8e6e526bca2cecd4216423d77057 +size 38995 diff --git a/eval-results/mmlu/0/ckpt_033/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_033/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f09bc408f7e037e50e7b1ac804d52e3586c43a86 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54a724c487170e38a8d0c47f2937c3240a400678f0c8d71e3e0a0a9abc98f12 +size 32598 diff --git a/eval-results/mmlu/0/ckpt_033/results.json.tar.gz b/eval-results/mmlu/0/ckpt_033/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54f9e4af8e16a8034a4c81f8c9503b919f3ff239 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_033/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1da27285a682370f7039c42e7da5fd8d8832aadf3d6c5f98bc3c8f5c6d53d5e +size 7564 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45fb3777850e148159a9deadb597aa8cf21963ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d42f80f20ddccd9292fdfd1f7ad05fdac62919cfd6189b788b75117a47b1c87 +size 16970 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81fc486e594865bf810394c8c12f98ab83d240da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52495abb42c879683dc408391573d091c84c0093aeb2eb736793d887a038aac5 +size 29402 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a777b5cbe84dd29ad8c3dfcd6a3d01358efd17c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8b60d99a18a477f363c6579e2243aa6fcd3acfebb4626d83bf5f6746d89b639 +size 39376 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7f7f7c4a085d43ee0bdfec559bb04e000cca4a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f42932fdbbf949f426e5abe0b90c142518268e40799eaeaed55f4b4e2749605b +size 26448 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02dca4f00fb495db318ec363f9389ecf49769943 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2db1e028537752ae9250c60682e33911235e2e63e3d20540e895e105b3e6571 +size 60502 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e8d12ec808389c578318a999febc1882114568b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94002b6666ee0c48a03b093cb37480891c72912682f15d665c9a1e34d829051b +size 39898 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84c648165f4c185d94384c561144f62ddc8238da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:509c4e0df0305c669f4674e2fdd8e5c8995378e638a2b66596a2c09d09e866ff +size 23550 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb10fea22b8e3c25aab1d68935146c5d035dd615 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e02c252c9ef83ca35af34f2ba6810fa4e1294c28742609f619e26af4ef46a35 +size 30866 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dffc64e60510774dbbc0feb9793edbd903b1919f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ad614fd5559608fc01230f7f5d4fa8be8bc719f6849dd0cbff0fe6a96477ac +size 22819 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f12976e6b8d6111432e1f00002739bc838341308 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58dba1e1ec0c1350ae3c6b83d6aa484334cbc4f3e1512ccb684093dbf63c89dd +size 60291 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b80ddfae46ad075deb5585f79562ed03ff2bb5b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c366c1a55bca233e8f1fc837973ef779fbec38738bfb9c8039c97723e0159f11 +size 25469 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3aedd92b1437121b45096de321174865b0c3ef6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf6c1bfa43d4e4394bb13a4d70c92b0f16d254817100c5376a3c061e589a8b62 +size 25517 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bea9c4cdd1586bf4f5a1c707f88f634af6a541e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ae6869a51db613645cfbfdbcef762dccf8272773e4fca4fcd928df864712109 +size 45858 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55368a3768b1e14f4dd102bf6edb3ff86ad2d5e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f1a7f6a3c63354d95578d4949bb186ca6aa2aeee4ec6cfd67c38cd0c7c36bc7 +size 31241 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44e42e323f6cf48ae2befccea14f8d6abe0fd077 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8fdc4a3ed52326ddb7d7cfba3eafe770996c5441278f83b8672cff919ae124b +size 28452 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..119fca513f86ba9f8db6355c58e2d561de1e7d55 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03520f17345e3b441a49b9053e4d03e2c6ecc0ce753f230322668b01c6c0f73b +size 73898 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b00dbcd52a417b5d11f70a6ce1b1c5712f80f2e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:459e2a1e279c2be20dd781a92931a9f3b1786dd018733dc110510595484a7381 +size 29877 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6b8f49b397c490b804e0c79a74b347853f25663 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29676b608d369acc46203639954834e1c1a5c669f0141cac1f99a586fe9a7718 +size 18899 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a2bf1842eab1be37ef9098d13d93428bbc65807 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a42a7bdfb06742706ff83acb085daabb0cf60ad443bf76832b6a5d0ee4053c +size 87025 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f70b1bcc48ded367c6658293f178cec7e942cef4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bf13c81320bc77c248059e38fb063115f6cbe1821ddaf6cb35fe16396f595d9 +size 49504 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cab1735cde3d6eee91582b258a00bdcb62ce7287 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b129d4991a0c981dc22dab0e7cbe40f31dcc05ff352092f257baa2e324d2ffc +size 31325 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea35a15943bce34ad1fac0d7fffa4852d00a3d24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b8d488ea7783a3f98e5fadd9b4cf1093b201443b4da65a5b49dd5514d79719 +size 144539 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc72d7a89c733f99539d717bbdbef0cf2322662c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfa7306465ca0eff25cea3ba898d9fc3b40fde8a43b3e0d19a7670368a3485a6 +size 44011 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11b188e483483b8a95b3c99889f85bf5d543de9e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:492467283ae753459c3e2c0c26a81d17226c45ed837db54f94ae83e91eba3971 +size 53749 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9490b5ccfe2cbe8f0ff1ed346688dda34525cf2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb46062e05b6d39c6a61fc1126276793230e79963f8c3d2b5d40f51a11345f65 +size 91662 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35632fa2b96ef94ddf79e32b5cdf74edcb8ee94d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dec7422b7a2f76cfbc0b52572dc9f93fcae5746becc5b61443ab05ecf5d0e55 +size 56727 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fba6176b7e691d22a54144d8bc5735bc3e19de6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:354d228138b1aceebeb21a6ad1db2900a6e7880eba8402425c4f04f02629fe06 +size 57825 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e61fad5f1b4256b116558989a11e6253396088c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c7b4df465045201118c42a4759f4167ee2c064f8db995fedbeb2f38279d946 +size 42532 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85a4235111b75ed657fb81c7ff4face2e4151d6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:034f5cef333c1b5151c551bea06afd73a17d47d079697efcf363d63d8cc063d2 +size 142230 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e12d58ae22164c57c5f8ee1260b2567a1b3e128d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c646bd9d0dd5248f7b04ed554ed298e7882b5c66c764dde2b8c4dff32887139 +size 70651 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15ffb617acb1ed02487354ef4377bb1e9523bc24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed526cb973a8caead5caf7196b8d306fbffee777c00e929c92842bd2f5da2b32 +size 160976 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5d1bb39f3bec0fe77b68ecdae415b594a2ae0d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee6a54be5c64d28361d4b100bc2ddd3c35dc1a3f1f60f2a4f2e2d404ecb637cb +size 208210 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..446dc62d04452baf5505133571586d94ed5dc111 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac371c540379539a6fe15fc15224ac24fd9fe672755b4f5ffa3e744abeacacf4 +size 49269 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c25c19eaff4b555bad9d77263eac1883da41048f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0d04e2dde3f5f9246c3e84af61d24fade66270e96fad17193bbfeae7d1799c9 +size 31131 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c92926812a6642900492a7196804a93b4981c5f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a596416127cd7bc19d43f7c31b0a78aa8fe5b0f6869112f8cd335bb845da5fae +size 35672 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b4c0b6f3fdf3af0c4b1676df579c3f76ebe7512 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8caa4bdb008c723025b59f5188a6240434c0d302c88a1a2dc4c57d8661228c59 +size 29147 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31f99e6db9caa4a2fd5aa773f025d06346edca46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6692583e2e02523011931f9905ae899ab74349db1dc524c045d1c561bb1e6039 +size 39845 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae17b7194deef19d0441c939d67d2314350b1728 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22e31eaff7a0ebc804e297a8c901d05dea1b0581b7b018c24463819d5e03a130 +size 26813 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d54c2d03ea1212bf66b43ad22daea55c91d4a851 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aade3627ee03f83dc0bec15fb2bd93dc98148585bab8d95a39d3462b00fe4695 +size 21390 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14f4d342750aaf4d33eb57096262b9a3cf976bf5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c0fc96c6eb8acc7cba6eb019a0a2c7807846ee5b6953e40e4cb460b32c9afd +size 56776 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..434c7b5587e2c7f1f8e57c7a2ae87af014a938a5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb9c779f21211c336d3368179258edc4cba4d28079ecbea2ced30c78410202c3 +size 21963 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47c9357ee40ce346e866583251c2f881bf521a30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89c2f4cd77207f48a9e17c1742420d679e8ff30bfb0b13b672af01d0510ba480 +size 167233 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18e7b5f71ba447662aedfaa805c63c1e15ae3553 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16fc0743f47b8d26d26979dc1b886745cd1f53dc201c972cb6af11d8b567d319 +size 89218 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3cd311a4e584b8cfe648601243850525d810168 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858864f996290c5c66208f24d6b4061aad9c8d888f8b6046e2f528f0315da1a5 +size 153567 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ec9ed8721a4028c58ca3fc8297658922efa5b33 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:538901c945fb4b5ab2908c9212bb188109dd8305f9ef8d91edfcf2ced6ca84b9 +size 78539 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..628cbff6451f04bee492832b35a75c12c32e8708 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f587e4888060d58055200846c52833aab9ec70f7b8fba0fc226144b95aee40 +size 73455 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31583e24777918fa263be77b854d9e3cc28a6ec2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48f5f87f335458ac669d7a6da4f72cdf7cdd291fb67d93fc334e09734d04cdd +size 81659 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0520bcb2073155783964bae709724ff819216956 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0b061b637f376d11aaa7685c9e6da7b190b46a05ab9dbbe46065b1cc3e4507f +size 89253 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..535f3e7f0132f25b7957fc1845a502e7853b91fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92bb775441fab5f568cc7ba8f0d07d3959a7ef22d23f69a224676c01fb8eee4a +size 999234 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac8a3626cabc300a9e9c67ddb2d9e74cac5e6b2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cc1d55b2abc53319264e01ce779c6af4fae427c6fd819987e6f90cdc878a12b +size 131967 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b24dbdc6cb280426d4d57eee50620d63bebb6c23 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3822485dee411720930e3cd5afe1efce0d135e55928a60bece8e404441c00534 +size 181027 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..195337938a0bd78d24e2291cb52c1edfb4d02228 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c138780312cf01e3fa13b4496629b3c8e12b57fc8aecdd81f2286f6ed7965214 +size 27210 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b05e34809df25f78d817d120535936d0cdb3c40a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d44202c66340a69222edab5423c3d7af024e3d326905a3c3911d4d8894619046 +size 119442 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f6cbbd7d28de867a6be624ed1743aabc4d174e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67f8ca8e8e5e5dcf33cf6471cb668ce434580596ba811fb5341499fb9b997a31 +size 57416 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b7ccd384ab558a8ca452e29f79e901c2b61d725 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d21032ae61880a3cfd33b08d5f881d8f05cb6815a38c9599b5551d2e8d97dde +size 25063 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80dab6b8a027fe3ae7e89e04628d58daff2fb3ec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1894d454920aaf4ca555856d87bfa1e9a0444faae44dde3da33ce2f808e409 +size 38961 diff --git a/eval-results/mmlu/0/ckpt_036/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_036/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df9996f676d61574731fd5759c1e9dd2481abe12 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fd1800755555ae2c5148551ffda0109656a9146c60b7505eaaec2d115aa1328 +size 32486 diff --git a/eval-results/mmlu/0/ckpt_036/results.json.tar.gz b/eval-results/mmlu/0/ckpt_036/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f26f7debd3c063025ce51f52f44214168c65ec90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_036/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed7d5a21b56a347b083e0a257ac4dbe68c3248bcca29e0611b2526d10535b9f2 +size 7641 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1b0925300e47fe44b15de33f40b4a968b5b8f18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c700f43cbdb8a8f134e0eaa09e78053b5efdf056f60dce483172320802bbf8b9 +size 16992 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd91c6d9a599882aff240d22bc020ed1c110e51c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc083c2e80efd2fec24273c95b5f7c09d19ad751b4e6e2a75c32068ef21c5be +size 29449 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c226095139d70be33ce4a32eab7aa0afc82debe7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce35c4a5cf0622ceafa4c099c5fe378330ddb50b3185a9b76d2a09bfc724ef48 +size 39499 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7abed2201e580458317fdde674b6e300786cee14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:343cb4a2d7951f7fefd428dd0e778a8596467536b376f48e63615136dc62ce93 +size 26515 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee9eee7fa2e7dd99ae22131f9cdc64fbd65b4c1c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd22ef730a26104bb7e92be8612c9b75a33158d3710e5abd66060a961c367715 +size 60624 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1a956c2437aa3075e763971bae390d0d4409bbf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4a17d214ce2d24af6aeda1efc618694a4801136da212f2ad360baa5fe673fd3 +size 39991 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f480c2c1f63df2aafc730e6c9a62028bc87e4b0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d198f203c083142f7b9aa6914e0c7cf62c07f9c6666725464b9842be34ee84 +size 23560 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5fad913c5d9e725542ca5a9abd34ba2a645a4cd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c74cf6d0ac07a88c3bc7b03c103af403e049ba94dd409c684f925cdd85c28b00 +size 30901 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b64e78ebf5e375e25008a4f458fb12d2e65df46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa8ba50888547c9b678950a378824aa85cfabd94f42829e0b0e480c89847276 +size 22837 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9defa2be15b30796ddf8ec8b7fbd186683ff89d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a4c3da6f64e518b9c994198e393346aec21ebc55b1a5230a8bb474ca2ce069 +size 60407 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3155567bc68e201e763291aaf68d1099cf9d6744 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55e92f64b1df5eec7495bb46e110bf0e9be8ba14afb90701ed8623617a7f9789 +size 25478 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..228a2db4a508dd12b0fa7327cd22aea409a79803 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8efc40bf26199944c5c8d4ae3f99178a1faa3ba4f1dcca6ec74864b90be48f +size 25545 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d2eb1390009ef6fd137cde3050507086b088b13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:619cabffcf1f2a2ed1d5e0222e9138323e3e0543d0542804ec006e5534ac318e +size 45993 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5621f7fee0582cca3888df1d182462b57882595f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b42c86e0a4a97429a5ea59c95ece642ff901cf08327372d925fcc3aadcd743fc +size 31294 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfa271a75da4b7ced462795226bb710df76d863a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c51e631089ee9be25cc8b90e01929a1499937c14bf845898e778516be02a8e6 +size 28481 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37fc9c3b761c71febee8fdc99dc6769d25ceb95e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d874e175441acc86e2c56f7f895283a19f3714f0e389883c9c1c8c5bb1f22c1 +size 73831 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ed3922ad0dfe9f98cc18d1b3a99da7c1e49921b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200c4b94603384bc36d1439ea88e66e3fbcb9efdbee913b86b4d3bba68e87686 +size 29949 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1d306ae709443ce2481dc6db2dce23060d0c053 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2967822da368dda8696d5e1ff92fb162f5439f90bf1f6514cd10f8039e61dd93 +size 18928 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..064a404e13a572fb0ae4dd2d5225b8bc17f2288b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:165781751e453203331acb58246f33af976dae07ea4e1e55e745e9e3d060e336 +size 87186 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de3e30118d48565ea49eb35836b56f0b51cc2f94 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60b355c3581cfa89c8ac77eb885220d0bc30cc944bceade8f918820cef6c1526 +size 49512 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e30c35813f30c466635c825cfbced2ac51873554 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eafd877df0356d5e50a68a88f9f1807e4ab83ebae8f8ed83ba9056c2b114389 +size 31352 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e3a90e04189e60c5a86ad78f5e8653c2da28f75 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a58a3d46c332d9dba3231fc3ccb39d75818509231bf64b041fb137ad77203a1 +size 144580 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bba73ae6555b345339fa97f3aa746cd7309296ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:544ec339ad2871b791d6d62b4ced1e6a72d03f2a9f0a8eee69703915feb7352f +size 44092 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a739d6ab262eb97295aa644038bc97aca1708c9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c521512d85c00074501f8fe629254776a594b44646d747ece9ab40e14fef8467 +size 53871 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e488ab4d71c3797c79bb97d5dedfe2aea1ff863f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e507a432522efe2d3ef233ceaeade7aa84604e5d4683732226923771f58225d +size 91888 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..efded76ffaa54ae74aaf72cb1af642274b9f39be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b4204096477a6119f4990d4066dcb46dc16b21c193454e85fa28b24d6f93340 +size 56715 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf407a235acfab14dcc17ad485051b2fa66e7f42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6811aa6d9eb64e99b9264f18495e67fc75fa1aaca28f3c339bbfd10b9c2448ad +size 57931 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79eeee232546c85845e3260f7241d6ed444f8a88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0b415646087b93a9849844cdd9c4db307e9c46d97fadc642fe7c60842445e1d +size 42568 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4698c50ceaf816d7416ebbcbcb0539d217fd319 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b1a9c25b8a7adab7389147ed3286baf70b2d2155604c52d5e782eeb8588298a +size 142442 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54b808a33ae3b39aff77dda98c7f9027e1fd2b24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b911932c4169fc343c9229fbbde4eb8047396e4c66d4d1b2bbea5c195382aab5 +size 70632 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8df032d56f86a9b2f15e2d523e103366654a081 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7595db9f8f272c913dddb7f7dcae70aaedc7b2a9996434f7ea5139e705ed410 +size 161213 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81391c9fe1a38d89b4cbb9a865b0b49a528dad58 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e830f8c923d6e7da2d39fa5f43533aeda38e305ee0250bb8986d209ae7a9a91 +size 208487 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..515c63b045bf09fe21d6a1c114b9dfacecd34efc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b616cb86aacf1b381bf94c510050eadd1fa644e27d5116f99fa07bcdc8e4a56f +size 49339 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41b186ae4e7edb57c62ff6bdef35d129d66d4637 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:288bf6cda85d71570347e53e9ef3d0572cda27f6e0fab048253e5c697612f01b +size 31194 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6096bde705cde209ae48e8b4b4c2519d3dfd753e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5bab3cd8e26a8340f07b7976d0a8c24b456dc9876ac3a489b9e4ce85e86e08 +size 35727 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63689dead835c8630ee9019f7c91ec0fd85c4f1f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26fd1001459b3538276c2ea925349fc214c42a189ecb819b1d040cfa2298ce9c +size 29192 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0b94df66b283a1d9bf2a51d213b7df15dd04a40 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5a354b253b71d09a9be5baede45c2d4eeb4b270b4233b785996fec3e0c31eb3 +size 39948 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b6595f4f8e9d7588497c3c662b8cce0bd321f8c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d52e4624ff053050c12177b34e103d5a954872cca74cc017d44fc48ac56ebc9b +size 26871 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..991e87a2f9cc47acc590e11e480b5c0902c452b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30487b24fcb730ce6f9a68717dc8b73867f3b7127d68f04b6958508e2d6bd55d +size 21490 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33bbf6ec346f28b76217ad1bdf205b9dbe9726ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:546acfe5ddb1448510e817764d2d26dfeeaed62ad5854e4561467bab8bc69f72 +size 56868 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29791c7c7d82a720ae269f906b7f19bdfb3faf4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae8f602ebdd85cf3c5f30edfe20f8379c330840bfbe657175a8fde612fa4b0a +size 22002 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23fa4b83513d7521adf5832905fe6cd5adb740bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2ced4f2666a69db6bdd96d437f898f2b8b7901f74101d965a3a2d3f4da6e62 +size 167437 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af325fbbdba951203ce1fbb2e4be97a555ef9e7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c33c4470a785942f9d94f11cb28926e9bfda722708fee9ba5f67c15cc696b8 +size 89444 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8b1aef1592cf922d7e3da62bb496a20d47137db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e335172ecb4643628ee8dc01a3f480cae66b459530f4c2fa09139013796ed08a +size 152889 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59f43362b744d35a87051256f439655c088a124f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1091c8d44e678c6130fe381f003bb52e708aae8820198fc25f76533fb1fd8c +size 78750 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d919fc7832224e1be966d450a1b40b99e570a18a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380ea75816f09cc3ced114034e1adb7d12b88dcd0e6626ad718f7a5a0c41179e +size 73573 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..645cd0e3828a1ac3a2cb2a69278c0de310737bd5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11bd19989f0001ef939a45623396a75bd26dfa965795ea2279635d6ac000378c +size 81741 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d06309222a9bdfd3f93fb716ed06086ca30b7256 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1041d5844c8cdc5cec04546c626e94f8542f343bd748744c34c03cf4ae39cfc3 +size 89249 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1037bf8521ca183b6f2b3878a1129b9187bad418 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df265bcf0104bf99f0af8f74201935d72bd7837953185f98f89339326dc36824 +size 999590 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..103a899475f96ff56689aee4b2ca9dbd86fd3813 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2632fac7fc70de21bed3e1b360e6761963a9f89317cb118714ad806e0f1f25fa +size 132153 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4db49e1f279d92daa797f71877c75f05dfd4a1d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39bde128bdbaa243c67fcb9332ad5bd3001279c21bbb39edd7e5ac4790102b80 +size 181341 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..064b7f73330dbf6843c4d4a8a844738378329700 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae214c211226834222b8e5d6a7484fddbefd2a386341dd713e2f76fad70beeb8 +size 27252 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b1f5e263c36e512f78e1108d9e0faa4fcce6089 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c1a741cdcb2f146c5dd7dd4c11e5fe0332c0afc24c5a85fa059166b116bdb54 +size 119664 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8898e4bdfa29a5c20ee3adee50d5f8885d8d130 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4059d9a430dadf53d96dce14b87f8cd982016ea6af73e8759efe1113efbaa8b +size 57487 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01a52e4bbd9878bece0ba1bdc12a1b5fcb5a6438 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25c8419e7ad5630d4e1a5acec6ea02a1e94cab370cd719ea37b1ab1f730e2af +size 25162 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f351d2d3f4ba47594749200a2eeea04dd9f024a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:506c93602947672656f842bc6a64336a9f83117e1b8725d554c58f60b45109de +size 39066 diff --git a/eval-results/mmlu/0/ckpt_039/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_039/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7c5889cd89edc24ee6612a13bb85df0828587e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d37c868c4e1d371623590aa64fb60e2f5109e49825683eac66f4f10dff253cf2 +size 32528 diff --git a/eval-results/mmlu/0/ckpt_039/results.json.tar.gz b/eval-results/mmlu/0/ckpt_039/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc114dd94dbdd7ed74372d6aa77d067d8eb2cc04 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_039/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e74e117966885096b65e99bc5048281804f650dcb6d5470d5b0fdada037ecbe4 +size 7568 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bec871140dd7ce310c5314f5cee6a3abd9038a46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34e4ccf9ac0978e481b571e5b477158ebe7956fe0fd35ffd4f32fafa2f3d667c +size 16970 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5f1e79532455bee9d107193919584c6cc38b613 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b4ef4a2734ccbdc068c91720980e0bf099de8b4c7b316421a752500f2d3899 +size 29504 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..115dd7300767b3d616bbbf223d0db434be5dcff8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa152651fbffc1afa563e8aae07925befbd1a4a793bae2d23831421f71327b1 +size 39471 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b01a69476c7a7886fa0d6126f3b841180730c7ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d2ce482262d92842a27a0673f056e830993c43a4868341262c7a04cb8b3fb2 +size 26484 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dec9ed4c7d3a7bdf1b4d3733872ba429d7a30a7d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4720528cbb383e515bba3e1da9e4ef791be9019b5a1531b48e80800ee1f226d0 +size 60535 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00cec0967fc2cef7c079a2bac769c46209140003 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:170674b1648d1a09e1da000cfe62fd11b926f38962ffa5cceb1acd40f172b779 +size 39957 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edb5a56750dafd76d943721fdda58a8e3d00c8dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4287c327c3395279453969cf8ff62ffec2a7095e100e08703ab00a543bfd7d2b +size 23584 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2eec20c1d743bba97ddf4b6e3c6f500b70b6a587 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c0471903ab77e3384f397bdc151a6407533c8e92078d0878276a088c82da3a7 +size 30893 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d5452f8f4080f8fb69ebe4a1ff6c27e4607f60f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb2f6b1f2457d92f5e285b0d7abdf05ba0fd24d64286f48b2b66b313b94b14f5 +size 22830 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f8bc18768641e209820aebbadc4c6fade059a7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7ce337fbc011f5a9a00b8a1f6afeb0b70633d36c98452e38779adc3b16e7363 +size 60391 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c1f5019eef21e846b0d706983d06413c1f19607 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fadd901ac23ed662813a10a7e395762fba89959eb6b4295002e8f4b98e8855a7 +size 25494 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4aa6b617a4cd65f8af7a0997bd05e3e94630f099 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9c4e92f301f30cdd1e448dd10a26229e8d6454e93a54bbac27b82bc14bac33d +size 25536 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15b7f6f8f1a0384bbabce03f8f7128434caac2b3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:689ef9321e60c0adcc4267bbc5ecd3dcef20eb74bdb92f62a941ef654189b5e0 +size 45937 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9063595b6135c907752ea68492721298f6d92a4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b526b4e7419a35c9c04e2e87ad9b4a6edbb940711764fb9878d2295eddba4ee1 +size 31262 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c545d9407dd1bb3fbbadc23b1aa13c63e8dab934 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1dd98d951621b7fd6e3bcbea628502173d145107fb3d15994fac6481b268e42 +size 28528 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8420bc7adbe6afa010ac8b28ded3e413d405fa1b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e42962f26595be9186d169adceefdd6d8b9862a85904a43eb5feefc87cf944bc +size 73903 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c54b60fc33291194aec2887a1e9b3bbd5d89b022 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7adf9cb95ce4f85bd1927ee2046146798dbdb236e82fe4ea7f13099938038913 +size 29930 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b12303955d2ea4cba1174f43a674054c7c9da00e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228a3944e03c4574cbd5d62705c9a56bdaaba1329f76f5a5cdb9d894d690c541 +size 18919 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c17eea922de7bf2d4df3b0b200bf8ec0b7d70f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e5dba9a6cecae1e9ae38c73fbeb39922aa641e1940b7717c57cc4c4d1e62ee1 +size 87167 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..599be2d9991d37041c2633d1e6bcd7a29cfda1f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6663284a74526b8b693a8ca0993c6bcba992e2c7ce93579263fc884aed055714 +size 49547 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0676d4faa4e43e9ad1e82e1731d5f10abf8563c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20fa888eb0a99dc2ec8115e60d323c3d6a878120810ffbd299a38181b11ac8b6 +size 31392 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4652c15be6883907e85700273e4d1e645a5f0d56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd41a53b5213ae5361541e0d643d263451f0447117abff4e6d893612925f3b6e +size 144499 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8940d2e8dbeb78684b686fe45869988e94c2530 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4352e88065f6c8fe42694658f7f173d0aa39d6b8e619bd49e55e6c42055fa0fc +size 44097 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..102c399de9f08c67cf0facb1f19c281d1d43d3b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60ff0b1224532f4c7683a764fcaff516eceef824c60e0e5f9c06d3fc26ecc1dc +size 53886 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62faa43edd50dde0c65844d658ec702cd2794ee1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea5e332d748ed6055f015ea3cbd73ec870cf2eef5266d3857bdf367de7abe7f8 +size 91805 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d7a79eb8313eb2cd749758de3d1b623f0b712e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eae0467789e2510899b1c18e44a5513908cdb5013f23248654845171d67fe04 +size 56771 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77d531c755f597981a83da745ceb8934849fa2db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a59861484fa397452ef02c7898335fc44f26082ab942cd809b92c6719bb4803 +size 57777 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a033c45e77cf8b6f3b29261b6a821b029a3d02c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:368fb71bd567a02b2d43c803209b578ed47de3c56967807e0d087b8887f95549 +size 42603 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3157365989d4f8e6de3c8af201a5ee30a03ababe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a7f5d993848889a7271c7ea9981bf36c3663a8d0784e26e7e08d18e8c9d311 +size 142425 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d3657aafd0645b5a278b1e2aae62d5214a339d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f62828f5977b9062f68540cf807cab9842309cef36c7c5509bfc265cf0a4ae3 +size 70636 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5826b6c47f07731985568d2d5e97e09e853e16ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e832184160f22e43b05640da222f3f6b90cc94b0b0991b3abf1e198a21e4fa2a +size 161195 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d282165240a95681e1fd1e32478ea12f8bbc7021 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb438e70c81770115c5f1401e23d4b065186769602ddbf379ec037c0fd8b2ceb +size 208337 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07edc6944b098766981423c56d3a9566fde1225d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8982e1f20fa19bc6cfb947c9436f84412757c824b162506d137181815b2d1d18 +size 49333 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27ad62e894ebd9b09be4985c261e224a461c3984 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54a299bc0aef610395f6591beab76fc6844644fcf0abe2951aaca0a9e3e918ec +size 31214 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..102acf621c064844dedc49fc4d28435f70647de0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc065a5a28112ffa0fc1a2370e8ede90cfa2f8c879bafe2ee13e4c10a670bd5 +size 35704 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ace37bb93383952affa753ffe5653057814aaa3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:104ceeb64575239fbe7a9ba5fd8f32d6bf47e051f7926092c7a6ce2301721c6e +size 29182 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce26d5af0db077eb0bf4e778b522c2fe56528522 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f7e41835cb95f4240f714707c24b660314586b0f25756955bcd6de12f0e1f26 +size 39968 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6afe4b3b30590926e6e26f043bb8b05eb9cd173d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46edb1471dbc2b7de2a8ef09d597d3c1797259b720416f18a60b6e6463fc5949 +size 26815 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37186b53ece48220b52d3d45930009c4ce155c76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c89887125e4a8a04798edf09057db148ab269f9f4fe5e2c573209851419d20 +size 21414 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..363335ce77fd39a69161dc7b5d806a06319c1985 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385f113daef3744e6c5b7ee21bf06f510059dc759b8a2d0d5a4562f8da09a42f +size 56875 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..129ac0244b6fbd441c0f92ffdf207c44d14a9fb0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:747292b9adb99d10d08f54d17729ef307143ffe1994658e8cd43738c0a55ee02 +size 22012 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05cb8817ccdecb2d1afb85874cb7baff0425de10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf976470ae7ea2034202da4ee1e504f2a3726521a3680363a359e4235286ace8 +size 167503 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..547a6e2075692d2821f11d9bcb4ee5fa7b971df4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7fa6140474cb573c805e412944aebdc2a804e18f77eda231d8bbfc7ad6f57d +size 89395 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b21eec90a7365ddb9a727fae7f5abfe28031a72 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b330483bc3de6909f512fb1626b9726fd6bfe79618aed91f46c768bf3040d8a9 +size 153392 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acfc073aef5255faeae9b996044ded41da696e44 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0eb556160bc17b0aa6f280bd217a271112227d8d5acfeb9d856c59fc4be7396 +size 78737 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fdf73e0fdadc266e6d85ffb2c4f81733ff70b8d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1246cddcff209313aa26bd2c438643f44f2b1731dfb959a03941654c649bd626 +size 73620 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e0250f4babf2d4c09c7284cd0e86b695e32aeb0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40273d727912beee406ec4e5b48711689707e2b4abdc58718001bde90079b5a2 +size 81816 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..054340b31997fe070d5d0c955a0b9c51234dbb3a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5435ce60f9be8347fb87d0f6ba3abbc505adfa5a11c32d82a091ed592610b456 +size 89242 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..614d7ca8a6a9c8fec5b85b72fbbf6e473c1e457b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa7eded3e3dbc9dce618dc5347f3b04aeb2fc5a025d411af1dac7decf10aa60 +size 999509 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..677de2c54635db853b55981383ca67c9029045ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89d50866ebe596a1476a6355677a5690b41e85968e78873afe5a11c23645d5c2 +size 132067 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4dd459ec42e4d5f54090f85105af2e9049da753a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a87d05b666ba604182ec548a005d9c4f35189170fca14280c2249842e93dc932 +size 181349 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0c6ea3ecfc2f61621088dd72ad21a3996df3cb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ebce68c8f418963f3eaeec0a03978530b7435964a959f862fcd33f3ad6d35c4 +size 27232 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..823dfd015a67ac9fc937aef2df7863247364fe7d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a1082a5f53f3e56e8931d3795aea453637c3ca7ad8483799cf862316a97f81 +size 119585 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e32761f1ed9a3af7e7f54e6a19aa2d509c6f884b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae7f7f6d2e5ee68bd9278b0e2f357fb91f8ec2be93fc0f74d3f915522724c11 +size 57448 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3793a0e3b9fff2ff1af260f8e554e79359c303c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd33dca6524a9452aaa732f578f769fbd31c149e18e55f8bb2c9d9e0352dda13 +size 25168 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75d3aa45d553a50ed106e948ba82fc468dafc24a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e4a581f5c09d2e56fb5401d5dc18d80e4fc1db9f6052da7070bd1d62984a6c3 +size 39086 diff --git a/eval-results/mmlu/0/ckpt_042/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_042/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9fee175defde64c7cf6082f134bd5c43ef279a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42e7361dd771dc9760cadb07f0397fc5bc483c07479e10211a3dca781f746b7b +size 32590 diff --git a/eval-results/mmlu/0/ckpt_042/results.json.tar.gz b/eval-results/mmlu/0/ckpt_042/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb33e8641bcaa9ffd79ac338de95fb145b9f5d02 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_042/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f890e4269a9fc96f08dc2d26896da0071534af959d3174314c8dc2af30a3cbd +size 7604 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a5d367a88a7de4c59ca44bf619afa8586bd6cdd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1e6273f06dfe9d28336f0b358be53a8a476cb1c7934605e0e89d0f7b9adcb0 +size 17035 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..038046370e3a736dd5064340f1152eee6e0a1485 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05dc13610383ea45d9d092d2b0fd66b0ea93592b73daa116d358c7150427a5fa +size 29547 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12148248e6e26be646d28d24139919aaa2eb288b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a59b02729cd1b4cdebebb4e7eafa04a90bcc72635664fb30c725e8bb63bff82 +size 39526 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c414de12041629cc06e709cbf8bbd20accc8178 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c00e4401716a457858f6b86e56446559dc0a616a70a338d494a18b24e7d6093c +size 26517 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43de554e5dc08679a9ff417a1419eba81f608692 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda2638e573b29950e5191db20c5d5b810dd8d20fe28adc9d79393730b0f76e3 +size 60702 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..533f8ea0e4ef1551ea979dead9b6141bd0b7f832 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2892bd8e301c2f2246bc211aabed3a1303abafb9849e96ea4031ae43a3c90fa2 +size 40058 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9d9b628aa35887665cfdda941e99cfc1eafb817 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b0c19e51ea7137a79089c2ab7ac3ddcd80d1d6a593ec562e3b941aa9d425610 +size 23634 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ea8525f8ce68f921b95df57d09147f2087f51cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5b2532418d730eb4c4b9631c6721d6d4e7ff18a65219ce8c2eee19020caac9 +size 30946 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f798a002e1c96f4f82b631f3e37c8ab32438a935 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a362014be8e5e0e947d935e059494aa55f1a2d09ae517ff432b2c2d7f446107 +size 22826 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5954add4be53d60b4a2ebdde210b09dd3b501a59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d48dbad19db6df0071da358cd642be9c01e8c8540c07e330359630e0e5284e +size 60471 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75645e8749609b25187fc9c8a144e5979ed771be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ec13f2381f9f0ed94d321371e46e57f6423b33bd1fdd5ffad84195ffd19df1 +size 25566 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..882ee467112ae30070af794a52a0730aff148019 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c55fb99be5ebefceb1509391f214f61b25f2942d948f6afe3aae67e95c5f6c7 +size 25549 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..178d63cab2cb2a61698b6b57851916c611183d07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9caccd148d82ae42289aabe048179f551a8d9d1141d88993e968500c6ca4a95a +size 46059 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b24790998329eada645ad6cbd5581d2e55dd50f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221ffd516036cfe3bff010da4fc908bb4002f8f57968e867a82302d339054bf1 +size 31316 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95f51c6fd9dbf2e6f869e953b1cc8f8200ec7b4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788b11afaf982642df6dae1d52a1631e3852fd54fcec78b78665d144a346772f +size 28545 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6776db0e3a7bf61602103fdbd0341928dc4c2e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfba51a918871b77fd23c242131be6441c6d13715b620e1a62b9003c01821d83 +size 74044 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7771392c05f6a64a961b85d2e69d39148dddddd1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b283ca65dac15d7f4144b5d55a1d0c4d6c2717c2dbde2448366ec5bacb0281e +size 29951 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7cfb20694178441135f310a10099e8dc41dd10a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc13bc3d15c206d40c89b125ac0026a8fa070262692ec079c1d47942a9adcbda +size 18904 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25a1ac5b7f6eed67c5c4ed7967eab856db506a49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9565b85cfeed3aeaa570881920f7682f2db49390c9bb2fc610b2d77d5f2e6b9 +size 87256 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57939c67874c88c3f2994ff5caf2b5ed27914102 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34a98c36756bb4ee0f9a808c2861a58b2cfb8f77c210c0ac196c188cb4923d50 +size 49632 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2e93d51cf975a4fab56dbe59f534548841d4a3e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1baa84d3f57a2efb1899b1981d5a1a85ad5da77404c3c539b2ac0cd0e0242813 +size 31386 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6077a7f60fd82d331118ad91a2cd1ab806d43fa3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a7d60746550d5407bcbc999d2b67e5b21b84922b4aedaf1b68b7c81a6f55831 +size 144688 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d5056f9ecf187148439045c40fb68a0f661b0dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:badd3075115729d716711d0114b797f5857a68db3d83b623b7c29dbb34fe228f +size 44162 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25092c0db992db22c58e5832f33b393c400848b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f7e2e90b55341fa4b55359d3a754054fcb75797b4c9c3cf875c8ec0e5a3fec7 +size 53853 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b60bbdf601baec30dde646112530ef4cb9362671 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64a2f05d24b7c72bb1b6b1f1168698844e448e9fe100fb3b712316df44c61bb2 +size 91889 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46abef6b37b66d2fc368812de74e3a54db0ccb62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18535ee956a51db2539d3b345e5d5b8017022f9a940bc8f83890c34e1020aa6c +size 56828 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..158286699b53b314e897efd735ccbd2b1fb2bf4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c18c88d40869ad439531e4214b2cef89c0746f0cf3606572b13b7bb4c6b3d99a +size 57857 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2c0f283d90f1f992defd9f88ec320802eda8c1b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91a3d383e9445fc789f4cdf5860f2223b0b98278d7206411f9f45ce69f59f04 +size 42674 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1994e26c0e03bbeac457b1e9d8e926b5998d7860 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25f19c9d75d0ff1c0642cd6a9b1f5bef60685bd732b8d81ab7ce0cac2eefccf2 +size 142706 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16653d55f4d6b3140b783e1ffb8c56462d00bd7a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5435e04370b2d28fea5654332485b6eaa570fd5e954b9f23f02abd213f58c50e +size 70721 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfb1784b9d5e8e0fdddc87cca6d63a0938a264b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00e05dfee081558e5dfd91696a36ff66bb9f95042992c691b989fcfc461435b0 +size 161095 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d6f1009baa74b82b34d61cebfddc8045ab37a93 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8679e7f7e3018d8a81577401368cab1d078c62b786fdbc0889d257fc2594a843 +size 208474 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0df61c0d8db3e651aac4b3f8fceeee72a34c3c0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4764912692e8a73d7660adf674174fbbb3bb4dcebe9463de0e75831940759b8c +size 49440 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f00233c184e386ed90e2b16946fc25f3853f24d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6d71cba7f0ec6c475d1063fa3695e767a6f66477812f805e221c13b6d5f780c +size 31219 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c35d9ce1a1205d12a720588823fad661582b487 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb1d8de9d0036ab71f90680b9a518c8ab5a3ad2b156d35e0790505f79530d71 +size 35755 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4ca4327abf89263604764f65829430a2d31b869 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17aa6319eb24051cbfa393f5de7431eb423b774bcde89a8efba967574a85e7d2 +size 29229 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef5fd8ae8a1c072440d32d4222bc478903eb3e33 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a86b72b68f5fdb164b60ae7430c2fc1f71f411409b66e6e0d82d2d44b21245a3 +size 39932 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c27a54c08f552e9bee57b4e0b615256d319d8a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13ad9c5e73e5c599d8bcd2ef8d401006fa9d3f8b1c52d7065977a0523aa97ebc +size 26845 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4b03466422391f49752caca2788a0b6f29a1d85 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c056e29cc624e70a92d26fb2f27e7ec76b94e76043e93cd51281cc2609b43f7c +size 21492 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea13ac7a7ea9a3b5c87861f684a2b4f2914e5de3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b04cf2816f0d317b70285a67f8a523a2b386ff727b6181bc77891a9cc06a8fd +size 56907 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df11d0760bf86d1c8f114b3f8c16a652e9e12cab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ba6d6c9e1c1c079056c9cca1d7be83476d84fcc549937573c21c98f0addd946 +size 22045 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1497b108b74448860b3e0c351757742e3abcbc73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbfb5bfd91af75e3ea5b3581f4d9590fa94e4d7edd36e7185e336043f60c52ea +size 167757 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d4eb4aa2b10f7fd812039308ae06df6455d33e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d72d4f5229da1d09b740d5c504847f394a8b694ad19cc41a51dd9425b6e63521 +size 89545 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24fd0e4ce3bbe7216e12319d4f4bc53a0f799ba4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1827bb5f6b3bee7ce9eb46d2f5a13a577142bbf40e7bfc32e9584d76e59cc918 +size 152535 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3338288ca9c900c64200ea0b91ac5c89c31b3e85 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:256cc81ce57be1118331c906f9d0fe074844222cae6437bdd18adf41be18f5ed +size 78803 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26a96446359d52916f82156032266a65842e7248 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fa817970a33fa1a6cc450a47bf3cb54aed419fd6e17e3a07d7021c633cbcbf7 +size 73665 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c9cdea009bb93b83f5ce5b3e8a816168005d573 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:541e5a3efecf9c8d590402c03757c24b000f4ab9516e6ca03d52247aa4e4c018 +size 81872 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..972076e26d5eb2d1258d24d762d611e5967af733 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6650a493640a98cafb56a5f6592c6344d54cccef0bfce262edf3b943e702a06 +size 89484 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57457f777ffd17fd2a6640930d4a3b2366907eab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf9d6303a33abc5e4aac5e4925fd90bef85e976ea6455f39bde55f165774c760 +size 1001084 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec83e40a04a53b98996cef21efd3251df0d3b171 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb0beee111148e557de163eadc21a05342925ca45e8c7c15a332d4d81a0a3699 +size 132295 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa3020c0883fd1f19ef85681ce1fbd508029ab17 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31c394120c7903910581e6ab50a0d42b620edd008842778f4e7f61ec441e7703 +size 181523 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d08b9b8fbd0528e3afe1addf9a5a1b1e851c679 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c0bef3e98e13d41d840b6868407eb4c8a5c2f2bedf36bf22f675c6b2ca2c372 +size 27317 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d224f8f71a5f18f79759d9bda3aa74e95e5fc5ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896ca4cfa4a7aaf55d256df2bfe7768c9acf65a272f651c7a5160ea03e8ac9bd +size 119627 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18ea93890826b978a5b2a970a097a1c9d1259e38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c12861534a67acd7c197c60a6630588d305735b46df25fc27170e80b99c34b7 +size 57476 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29c56edce7c79358e927c2d41d1969ca2e3bb9b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8892fa45763f90c29e39227803b035b0298907261a3446c8c5cbf232cf672b9 +size 25147 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b35efca0032c766d1fe31f7f38df5b185e3f90d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db906654e2f17ee4361790538b74e4498e3f12a9ae409372f952d58a0fcb2ace +size 39128 diff --git a/eval-results/mmlu/0/ckpt_045/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_045/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba1845892c4543ca01c9ea48206f3b912638b3fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63b5d7693cf1112ef1db88a93c577bf81da101eb2d22ed4e9e09a7a999b00f28 +size 32641 diff --git a/eval-results/mmlu/0/ckpt_045/results.json.tar.gz b/eval-results/mmlu/0/ckpt_045/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1f5da2cd5e0323ab540deb82c5d4d07e49b34d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_045/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a092f82dcbf000eecad7cd05ed78dd96adabbc5ae65aa02ffede5abab952c73 +size 7602 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31b281c311935ffa62a780a51ffb13b2a874aa9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f664107e2b8f8c0c64ef2c9ee1e460f446ce79946261ef077cc51060328c3bb +size 16989 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f567811577947236002c6bec7419daffde800550 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dd0a7df7a06f30a5a089518ad7abb776144925f7072780ce2add8fb8aa21b1d +size 29523 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c626f58afe606bf1c7f33bf5532499224bb9e6f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e947085ad4f604d1fe80bc2f8e518a5dae847694b88644cec1daba0078da7cb +size 39537 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f10c5f47f4e9cb0a42c41c770d9ae737ae3492d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bda1490624800bd701fb1bcbc62f0798a38d3654deec61e994d8853dcc89bfc +size 26537 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09e1d19bae9fe2472b811ac9bd20a71113a54b39 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44f0f0facffee409420aa1cf38550c77e5802fb574755d1c5447a0397d732f3a +size 60612 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e60a9633e805abf60ba8b2fea09d719189c5ddbd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16dc1401e6e5072ff2670d4a0db0d01f28be6c5929297c8627158f65f9a19f4b +size 40012 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..821bffa113a7a645badbd59140a2452b0155a2e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60041bcef3717078b1e23939dc958943b65f574c40c3cf362c82a946af10952e +size 23615 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15ac40038d173075b43a352e75012da1ed3c3678 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d78c27651ef7055c05c907a70fcccdeba30274136cbeadfe5d714af802cdb32e +size 30950 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a012c8db4a7b4d50a8f1f03f7c0280ea7779b5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59dfbcfbf1d31887b1181fa26afa07c8d3b47bdb4b85b64e095a598ecb91e8fa +size 22867 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8471aa623cc494ded7e3b5f8c1907e65c0f845e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de3a35f1cf8f0ecb8cfb163372ad67f25a545cd019df2c15df97e1746a1e6c8c +size 60458 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ce1ce56afb38298319992d56e46e3ff8b967774 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:687c1f926216e1f7855944eaef1fe3164efbbfa63dc21524ae3203868df06cad +size 25541 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1778e2db37d1e1162a3983cf897d6a14f4b836e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba2d12e175fde4a5f8674d87fb977cf22bdbd623326f5c882d8163db4c35d3c +size 25540 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29284ef8f5474f0c4ebe555628733c4e89f759eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd53c119b1e59133f6b1a47f461ce52fdde8ae8de4ed981529c19f8a6f1e8ae +size 46077 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8af1d9302808b74f33a993b8a602943fae4c1e93 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f604cd4a48dc3815591db18189a992b340bb7cbd9cf3bd63b736f756ea8d6315 +size 31282 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5ba60927a4bda9e7e49a37872a25ec4f7fa1df6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4680a6522f92b9a633cfe9e9581df7f117cd2ac9d85de718ae7bcc689d947169 +size 28554 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f734a4dbd11388a8d22323a32b6d4eaa0ca0e1e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c410fdc32215e43df8e40432d574075c9033bd20cd50e171d04d0d1fe261fe03 +size 73916 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2826c4e96b0d83f810b48eb8d8de58c03607a3b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa36786c06824a8a032067d994f3eb1e5180b3597c06851a7cbea58228e399ca +size 29906 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..947263c7f8e1f3bb5391e9098e96f385218ceb10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2816303d0aa81dae5f7bab969b762a520ffadca6bb52c9e03c86b56cf8ca68d0 +size 18934 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8691c6e782919154a982f186c8ea10e659afaec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:397f1e89049685faae19e843291955696e636de3a84dc7e82b42f6eda67bcb68 +size 87292 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4e4860af524ea2486f7537d9eea7ee11df396bd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f753be2bd89cdd6387a50b97c01a689f7499eaf5c99c8a087847a78085cf55a +size 49601 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db53013eb66e90fc910ab0d9adb9df0ce78217f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e3f6108166210f74778d7104ac1725946790b33c7ca01f66a396b50fdabeeb +size 31364 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67d8ed54e4e38c03ef1989b3348d29d5ea117fd1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503b51951524d452128b22dd9ae4cc5a2213dbcc0a39b6b589df756338d0fe30 +size 144686 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fa4481f146cd1c9c7a81107138cac634ff553a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c96c97f5edbe193a5e530bf46c98d82877153410b43bed6a12ca2f5fca6aeb +size 44193 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0dbdc56b9c7bc90ffcbda8728d8c3a22c150d867 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a49b83e6e84a23250a200edd75ef388446721744409a323b3d2c5f1b9f279c57 +size 53940 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd0ec16518cc7cc51a30bd03e7f0813d4c64fe5e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bea81bb0cdc27616dbcd9e6b1558ec3abb53717d602259be051ea02ce7e649e +size 91908 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f51e07e2fb9270dd5b63568ff24c69a6e2d96036 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab7673ce7c1a6e5c8bd12281e7a06f454993a79a71582423ff8c409f49e86987 +size 56832 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab710e709b95c8fa305e5d95701d53dcda485428 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965c1049946d4f1a722582c297d9fdaaa2e57c237d33b67e3645febac66c8a37 +size 57876 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5269789f1a83ad271198ad70385dbce18e8f4cd4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fea98ca0222e99d9e0094ed07231e3f0d2a3429ea4ffa7d2ff79d242f9a20da9 +size 42614 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91df94b381c245bca749fcfc54296f86468004fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef04fe3c99fd57b5a4e82be7d975f5c3115e1d8a9d467acea91c540ed29e6a40 +size 142667 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b629c4712f555dba5fe20ab2400c4e7e0be8b9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1c3d45c4029301d96ed22b982b5d08d5650e201aea180f385fbb3bb2e71061 +size 70768 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3d38d6b21cb7e9b59b214626fb837214d885043 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63f3ac7a2ce08a098c1896496e49e66d19ab4fd0ce7a6b59bd20ebe9ff575cae +size 161314 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76df6e4b62eeb91e6e61f16c718b8ea530a14b3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2317d0795eacad361c2683cce3a13be30466f284cd05e594189d99bdc1a59a8 +size 208436 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..663755276526d6b79b238fc105da52007a937f8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6108bdc5c648ffc6ba69a77515bb7b1fccdd59ff7c8f56984cddf765e221280d +size 49468 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da7d1d6658b84082d67dcfb3be2fd52ae48cad6f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade061fe163a46855a95dae86a27c73c7d7d30f9ffeca12a8cad80dfd5abfe6d +size 31230 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..291da275f19cae23cec66e48c663e1b13c35c836 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf47a2ef6c0e507d9fe5b1ac9814bf9554912e5e621d374072c0cebb6e1a5c27 +size 35746 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5acd9471649e2bf3188ff078aea304dc82556e95 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71be23b4af4d2342db805445112ad3329bffbc0ed831c7245c632e283abe466 +size 29205 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07ee2095b0dfcb66ec737d3e6b367f14e9d48e2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f4f359a115955b427c528e0b26525c33a4b583cb7a55f1964e848bd78225f35 +size 39906 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfa8b1a2c0f698c73e7b5890bad08c2abda94ca7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1c710934a67e6d3a5172a86499fb48cca52ac1bbeef95c7beb5eae752c84529 +size 26848 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..104e0f1eee9b0a4fbad6f06623a065d8130f2866 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:424e47985771849052db6a3d888e34714269f4c6ae1f4ffa3813c356725b375a +size 21485 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5418e061a909ea91db1e055d79cd8be381f59006 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b961e0e954959de4cbf5c86af25bbbf507bff5f2c700388a1343718320870a71 +size 56922 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4a9e87ef80926f26634522eadffa0098c2b71cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181af8169d5eb8ad730994b83ccdf023b3633b7bc2e7dd90017305823ab2bc75 +size 22022 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f58391a694e9227858794fbcf3fd7377d9447be4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a867f6819127244f7789171043d56f539412af9ba6fa406f3a838295c16f2a9 +size 167688 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1f005674084b9b41b7d1ee536012f692e967a45 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c05777496b0928fd672e06e73900fd2cfa4d0b6538661f9907d50e55f7775ac4 +size 89518 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37a4ac7db4216d51180c52ee148c0697cf1992c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe35457cc4a64678e760de5d0c79364923829d33c4728da7ae21d9bcf3b9743 +size 153957 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4bd97c1fb1c28d60f0e1b3bbba523723593ca0e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b4f369df75d48809e4356c5e32cd611a80a9d74306c88adc704c8db25c0c612 +size 78841 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e780ee903523b5e6676ef07926bf3cad619bfe6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af7a9c7cd4c855e6cb184f5b7e4fe3ec82187d7bb7e9df0331d6b95e98cc3c9 +size 73697 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d050b9c70cb7af0a21721f9e4c6e7402f6112f39 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6bfc9e118411dd55d245a27642e228f277ababbb0bf2598f13ba6613990208c +size 81870 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8b335558e8942b7a0a7a9100728dc4fe2700e86 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07bad4c77e8c7802c4267a1c554caa1afd1a98f12a985673770f215fae61c819 +size 89242 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3274bdb9ad559c9b29ce56376e216f8c03cffa02 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:839a70d3c6dab345ba9681cc10fcc7f5012b063b0fe8b8a981ae6db1a53344ca +size 999941 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9ad76da4d2c05b11b65345c8967558e317f5c5b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecab4dc3eb53f4016b034bde2ebc6a845089b72276636d20a8082b4c208c6129 +size 132191 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23718f739ef03ab0d3fec9be76f5f0006927e12f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66ff214d322301442f4799e2da78576944db7a3495ecdd497405fe4465a60396 +size 181499 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..352b6e527780b388c859e9acc4ad5fba74387376 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b702dd140fa6cda938f3008be97b47be61b4bbb948a4e74b7528d8c5540d8a0 +size 27274 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85f8275affcb91d3ef10c3bddd05c335430d7af8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d4cc92cba40b006b43df34ea7acfb4f91bb2721b66d965f21509584074bdc5 +size 119731 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..072cbdd609ea66ebdab18fb6ac648d7914d94266 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f61b0361ebdac24de867b8ba0a57b36bcbb9ebf79e743ce96482ea23e599d364 +size 57539 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b65216fc4e5c8aa32c637627752355e017fca9b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cf3a6dabc969c7f1fc6a136557cff18cbdeee32e83e08b22154a1f186dbac19 +size 25239 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51f22a353c4ef0fd5552f76121601e77ed6ec581 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c94d140510daa2c6fbdd475014f968293689889719e866f642495f25500a39f4 +size 39100 diff --git a/eval-results/mmlu/0/ckpt_048/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_048/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d4806e62babf2e7f6fa78b43c2797a36ea139d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a65f44498cdae4416c9cad510cb1949270237918bf92668d6fbd91a686f5ad9 +size 32669 diff --git a/eval-results/mmlu/0/ckpt_048/results.json.tar.gz b/eval-results/mmlu/0/ckpt_048/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbdca44e89c48c8927d3c61d3bc4a6186b5c44c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_048/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e5cbf9800a30e2df67dee0f125b2e385d81f0b1f5c98f2529419b00344f343 +size 7620 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e37ddd90704a43c2781b1cf6d6e2927013ec8498 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7d9173702e8cd8695446bf663d30598d58a8bdc9f4b1988b3fbc9026ba384e9 +size 17013 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d1776d5c8d892f7b2ad0caf60037a908aa422b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a922e6b51232d76fa4bb48eb95858c4cd9b3a449c56ff13a922995a8beb17c +size 29586 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6188f6b28b903cc93fe521b73db5f2adc04d0984 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6328f65e475e71b828bb94afc76f617e56f6c16b70ab1233185a79b8351a938e +size 39586 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06aadba7ebb4c3721f6f7aa914774458a3d405cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbe3b25fde7c40c5f052082ccb3a335ddb8e1b25cf60cb0189cd7f318d55081c +size 26572 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10fa1014af43499ad29dbcaf2d1adc014d4138a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eabdafff01da28df1d68934cfd0b0b99ad8394bde4d85e5786d7db1f02d810b +size 60748 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84ebe5733f1885483b89552a6786927e7fe8bb8d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec1f36a547a9a3667a36c358fb8444b63c84bd6742726392b775c5e98e7b1ef0 +size 40104 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d45485d8fe79218fb88ff547d82a87e49f4042a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a081968f12dbf4947077cccd793f6fb6d022479ae16a118ea3d1703f571a358 +size 23660 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f458472f1df96872069ac7f1357869adf1db8d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93ecea99d3579ad2e2f60593313d619e54e322c674694c82b803b178e3b5e9a0 +size 31001 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..620127e67fda73d0b3466170511a230e8bfa2ce9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ac1d4c2ca1da0a2a0f0a222d35ce689415f67d5aec5b91131f45fba6788523 +size 22888 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..806963d7a72e5f9b1a5cdbe97fdc03e546ae7d6d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e13031c9d91a17e93f07d1b21dcd450e5afb245cef6251433274453b1d71f9a +size 60537 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f89e8e1b02b08b7d8d3a50a6aa178fe45ecabd06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44d91aa8acc2d35010f987ec130c494fd28c7a7c43989dbf98d9f7e46423c786 +size 25584 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32c86c15f536436f714684a7163a657b2204eb29 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0235e210587c65adf3f65b32fb6bf4a79c62319567223161dcc519e6904c81 +size 25574 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad8a8fe5e36d622284f6dbbea7c009687af8af81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f42a87699f3c862e5a843ab9252c6eb6595bc9f10653d97ea2e2dc0ace91b1f0 +size 46200 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff6d022767eaa9d299dc30b8c654f66dc9dc17ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc67b45d8bb59ea70d948c34d5829414929488ec0600c6a63619d6b8c1ab8ee4 +size 31347 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b497584248d7ad9d4366cd61cdc39252fd6624b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1677ddd22b22a1f0943749956cf782ee9aeb083e1c3edbf09dac8097167ef4b9 +size 28582 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f32c5077101dc9acafb3907c842ae77d9640cedd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf456522863da4b62bddc8d1089b866ac642b87da66f31fd6520df34cdd8d554 +size 74087 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a11148227197916159678e10ed2239d71747aeb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4af781ea48d9307b9d735e46ad4571f36f6b406eb1cbf568746a151d6cf7eb9 +size 29949 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb3851e2f9c06c6c1781a6b145f1bf6a8694857c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d241b3e038f7ff816d8effc6b73db74dcc7fe2a066c78540d965f8f2b599c3f5 +size 18972 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49e2b994c31bf1a65a2be651105fb73abe41dbb2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e5b6b04af88e3550f8349a866cc9543832d47aeedff6a4d56ed9a319fd3a6ff +size 87414 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..501214b53c9b3d5e3315f7cb98e64fdd0354adf8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7daf882e11c0db0cca229ea4d3f3948c154a605e449e5d68270df4ce3144be41 +size 49683 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a228a06012401060f90ea695b6c2e521c08798d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79db6bbcbc9e9fa7d3e83c6041487319ca1e4155a9fab9b519afe950b86c496c +size 31424 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8726a610730c7333d67899c78a08a6b9ab00f30f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02a990b73d85404b6fd30420776a78130def512570f8f3174a42a7d30ba0a7c7 +size 144850 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a23ff660e48ae0d11b97a7b8a2c43054e009408 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7786b97f65d7342836f45b3a64640da8d9babc854025dbfb958cd391fffa76 +size 44183 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea381f9660d85b3e7cbe38b9ec1e61bf6a4cfa98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:746333b13b050e2715a5a5d95b05f998091df72d5a2990b9e9dd7d58d11368a4 +size 54029 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7ebed3b2b4ce9996674c6cc706683bc84016615 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cafc6f56054e5e9a3e5b69c1425e41f49458f0c8e1e8709e3811cf6d1a563bae +size 92139 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f549ac3eb0d8ff0904116030a40429652784beb6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72f4fdf0867231a48187855f47b84ae36c36c3bbc7881fa5745250abf8b5059b +size 56820 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8739cd61e8f8b453a7677366c7ceeb9c6c4e4f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa6f6ecf24a77cb4eb3020932741077e43e36a40e69f46ced92a9acf8628dcb +size 57957 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a8dc162494c90064211f46060652c631ce40e27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a00e8f525d289127ff42f262507d10338c6d2cd031bdc52850184164071ea96 +size 42682 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ef2175a6afbf807fc66b1f7cc16387835b27a17 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d12a88e67f3eb76a06dbd04bfb50bdb734c8ee09edb6be3a964a4ae5fe8dc5 +size 142928 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3b1fb45407d5d88125ea2306ad17c0e3ce9f6a5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a5ca3f6b5eabfd98bbf63f6e217463653b64ec959d359cd68b3c41c5dfb50a +size 70805 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3d9243ac92fda11d54be22d9a4c4513d82fdb49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea5e33600eb79c30e9efda3f327b779aea8de1a5154efc5b19f54ed9c98f883 +size 161404 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..156b89221d824c5b2055a0b6077acea56d51b514 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b48a123c32ff83914de016f7152ea0db37c72b910a68f5ff64f8ae76de38b87f +size 208611 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..296afb16e8f4557b0d1dfdfe0fe1af985e0cc08e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c85fbfbe28472b0b119559616449a36862cd638a17c409f2bdb65ccdcaba1559 +size 49508 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7cfe749c66ff74db171ddf38c6b85423bb7b7bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2990e63a10011cc0ce029a72761796e6e62707fe432ad8e25b26928e29a96edb +size 31276 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0013357127aea6940b512f3536b286933a5847e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65469fb6aa4642d5af942469e79407419f92dd077b3c292c2d02eb15d14a1fd8 +size 35812 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be79147b87cb7cf523a3a90e13096461bff8cdaa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a1a218709147df8e6d1925908c017ff4df01162cd10020179b099c588e71e0 +size 29254 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..494232621e87f5ff9ab3210030a8b7c5e328dcc1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c309117ae28380814e89c27cc2f139d57fbb54ffe3e4cdeb604a05c12501370 +size 40024 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e290520905090778cc209fbb948493acfb4c6e4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c1a24f6475b624680b92cdc299578f8e8590b6cddb1ec69333bbe75236ce96d +size 26871 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b904494dd0e844d770c8ef1a34a57d5d3d220c19 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2633127e2e6b7c7e030588016e18d2eb9de39fb70572abeb72145b947381ad0c +size 21526 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af42a1d64acefada918783ad252afdde9b8c573a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e559d2ee37b5b915208408bf5ba4935821aa6a64a05fcc1e0d48b7cf9d1ee7 +size 56997 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa3b4f7b5ae2df50ae1dc68b6e9679431c181f61 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f13fb553e1e0ef34b0a3e3507efd6eba2584d0e4a947031a2b0fae07f3854711 +size 22058 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f1fbcf24984b0b450d135e3a6c5e0310b882d98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13827832e2382c95d4ab99bf6e7fe10f7387c27c9f59f1e95dcff6ace43007fa +size 167970 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2564cc35c1f8ca18d7ffe390e4b475ee0a645473 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c84a545dbf52e622b43edf6b1e7af019243b3248d8e06cc68b9741516e34248 +size 89592 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..082352bb00c857cf54fa015b00ee897786927cb9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41ee117d6be5e4fd68b3d62be76fab0449a84950ce93554d661b1d8735222124 +size 153428 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b94f8ff91dca8bf671783a2e4369bf0fcbab00f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7eb35785ea471ce1843c4bbaeaa357a5a46498a8a458ec31b5118081c4d34d6 +size 78880 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a4275505dc8e5c790da1e46b59b03dc665cd976 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:688b5278dca15bb4dcfffbd8e3a7801fcbb462de6d8c92ceadead492d2580221 +size 73771 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b06886e70b79c3918c936c1c0483f4f2a97439e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe24e98eb89d012695363a852de91477a693dbda51d2c475867618832cbe766a +size 82033 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df99aa42a9718a71a082632d872025d0bb126932 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2597a5e85285a8adac5ed14d726afaf7feb778ddc1d722bd2b2b3bf65c86994 +size 89397 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f8998afe138afb7dc647323c94e88b0d988f817 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee069b27db292acb91a9ec61bd3fdef73eee2d7623ae7c19b50d09a66759979b +size 1000661 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da2c22400f87f8f1fad869a21bcb1a83c7a452b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54ec069d666d2d2ca32c62e7b3c4bb470d7f2ce8adbbd62a4794e18bee7d753 +size 132334 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69b7d6d4595186a9a294ea80238ad9359ca4b36e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b4783198ed1c1db46b970cdbaa0cbe60f20ef8e479ab7f3dc39408606d7c726 +size 181795 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..220da6a8028b3d0c8d9208fa70ee4b8a630bacaf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71d1371e489fb700431c28469d5c505ece7c472462ac579dca88c59f437650e +size 27336 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..098fee7418627a5a2b5cefb690904b983ea56c7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e510aec83cfc810d8d4ebc6a8af51ea269c8c38cb5a0f7ca978d8c268594ae +size 119848 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9f7da4840a3e6040b8fecf1c795542493371e11 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afb5a99b56b319a616d7ac643f2ac33fe5e0ef97807c08f389d9650aef7bca67 +size 57662 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42bff41b7df6506e9158bfb0fb38caf5ce96baff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cfe75cdc1212e6956f74206e85f8334d84c69cc49ee07eef308d18c7500f5e2 +size 25250 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da770276dc77c9a57c934b7a87dd041f445d4a96 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3844be491eedf86f6a9cca35cb3d6720507386c2135d46eeeddd6a5262df6070 +size 39181 diff --git a/eval-results/mmlu/0/ckpt_051/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_051/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a161c9b9a667c6885b68c1fd04eea685cfc2bb02 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e307920c40502119f28d3d33222e03651813d1fae0671659cc87b7f0f8e601e5 +size 32685 diff --git a/eval-results/mmlu/0/ckpt_051/results.json.tar.gz b/eval-results/mmlu/0/ckpt_051/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fc1c30c08295b413bf3ec068a893120b6ab8587 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_051/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3734263bcfd06a01112e88ea67f3172a8cef9ef2ec6d339641fa764a1dcd58a6 +size 7629 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8470be23e963d719212464fd3e964e0acdf32151 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68e49294764a812a1482708a11c4d12db63c210d664406f10565f2cb6ca62b8 +size 17004 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d48bea1c7165a2004d7a0e59297ccac1cb3f5826 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4700a93d76bcdf60d7bfb0cc278f3ef3403845820a0f22061c54a3dc32f30990 +size 29641 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06f032b90437b84538335172c21aeaa2c6c05831 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75d10ce09b32620a3a2a6b58b751a4bceb3fc5c5e2dee8744b2b6b9b14977aab +size 39603 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9a5b6c1269699a24160aa687011412696d3e03c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87938d25402e226bcefa733321ace8733139a0aa80a0857038b0013873632dbf +size 26568 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c68a560123a69f88f5a6824c05a994d0f60e94b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c728d888e2f43904c55629015762accab7441d811a98deeebc8dd27675f638af +size 60842 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..156e236097b80c73743e33cfefc50d27a979e786 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f1b87feccf1e4781fbd24443809411a5fb8325d1e9957007b4ef700e98d7eb +size 40147 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54f1ec7cf5c12a064f558155cd31e809f2fb6b9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7e74e932518d6038627eb8adfbc71670367b4f0f3cfe122c518889e0aa4801c +size 23610 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61992a337e6d6add820d93217cc32c1c41121a48 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3ea2442f202310aedb4f87170315c4096ead139aa83185f0162aad09ac77a8a +size 30928 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b27d48941c8afc54be80abf413bc680b43547e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d82d746816f8e2a104a69937240af0c85773ac9acc972397cd4727f461324f +size 22891 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad7c698990cca42b23ac0ca0b1152db6447bcff7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293097f0ce19e742f34a397157507763d412e9295fce8ccd802e9931e42f8044 +size 60585 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ce3b2cd9d66cd61d336b9793f7a4a5e45ec7f2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd756e55e64f6700f6ee6b6090dedf20e9515308dbff1c622c0ce1c74a507db8 +size 25578 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78ccd2804f8c869619ae590ba09a1605a6107f89 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff4e590cf548836449039dd4d95f21c7602d98eca15b5a078ced530672503cfe +size 25608 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df9a468d17eb28518c9379872c92eedebcf1181e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2cfe565f120a9ba306d607b32343f17c2e718d67b52f41512a3afa9689c192c +size 46231 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13f138e36c93683f9354a0245080e92061719fb2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d353fee86ec5da9ff76f60eb6128d03b747d265e376a7caef266925b89ed625 +size 31329 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edcff615fc7b0cf9664815a03f7daaeb3c1209d2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08ebbf264f33869b6e34c4c8c9fa8e95e2e2eb230d25fcdb9189380346c8630e +size 28604 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c31622df15d1fd27095d4b30a71b9a6072a2387 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2209eedbffbccddb2097767dcfc058412187ca30dd788f7302f7ee1e68aceb2e +size 74131 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83ee6c01e29a361370a27f55b251af8d0585d197 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8e6c8662b396844e96e748edfb1dff7e97c3187b2a97229ddc8b5311b221df +size 29976 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3ed7bcf069430400411dc919e7ce664d41150d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5cf8d707c9d28fe6b67dcf391f89b1a21359dcd0dc70fb925b52ddf72011d9f +size 19008 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0af87871614e3235bc289c59b4d765f46e1a271a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec93319e44009d955dd56b6f81869c116b20a8a79b0028572c08a14d26253c8 +size 87467 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a3f0e943b8f6bc6fdba54ce908fd2247de6c9a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5507103099ae66f37eb261f534c60d2a0f78b562adcc334c9997c6c20fe18f8b +size 49659 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87cf8a0b5a7da34ee522fe645cc82a7d075453c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c22f59fd03049427e77250e2b14378551e79fc4c7768945dab44f8618d56e873 +size 31394 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04f7dc73b0e88b9831674ead46ac37519a2ec144 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a6b3053d335aaff5945aecc4584e396ae8f7779f1b5e975d2a34b26a562643c +size 144861 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9bad4eab168b691bbf13009714d4b5e94c90992f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a84004754e1abed350f194f7d236000a7217dd06f7d74827c1957b2ca08ad56 +size 44285 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47d4937d3a96025aaa83a5ee502b76397683db18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9aa69e2881e736198ea167bbd70fc9d1c29dd72396dce9f9392f82950f181f4 +size 54070 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..134de5e2fd76010c81cef22b833f944870ed8cc5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e62e972953f64fe2be49ed02657b7e2f33641931a11a5d19fbbd4e674471e4 +size 92129 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf47e608136ff8a7190f9e6bf05dd3665dbca650 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0afbc7a8211d09f0f3bd658e096593c94790c7a776caab604fc96be7285e90df +size 56829 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5f2d128f958ab470fd8266b579a08ccddf097bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a671bff8de5ae525277c7ca41143be9885b9660c686addcd872749089101383b +size 58054 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a8f2a4d27517692bff8daed1d138bcd07890945 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644960393541d22c82026250e31a0b01286eced8de32bea2007053c946bc78ae +size 42679 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f17111043de048432500a4726c73af90df58cc87 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c409e55e2f811fa033a8c20a759fa54ad8d63bfb64299bfe5d2d3b91c55858 +size 143102 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8890f72abdbe0688c8ff3a1f18c6200c38307514 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ca6541d8b2302418c2b052ead982cd26eab612cfa79820d8fe7986eff80131 +size 70761 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3819a909ba8cfb4e42aa000735102b511146e18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:329adfa699de4024d8f3db4a4b738c6522c315b61dd187997b0c002a5f896414 +size 161401 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..383f1913d47b5cb219dd459aacfa6124764e658c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d1b47462269adf60fbb9ce08045915e51a047994b5775a9c09de36a8511e0dd +size 208730 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ee2ce7110f80cee94ce1bec3a5d2c4b4e24fd76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7313b714a9f3c48e75a9bdf9cda98083f63b45f209cb56e07418ec3b3a8586e7 +size 49596 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66fb82e1ac0a6265ddd483487b28fda7534ab480 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca9f5c176d3727860be389621cbc4851b7db1032dc503aeb8a265c16d7114a03 +size 31351 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14e50602dbf2228eda4fa02c343a1502de0bfb89 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dceeffcda47c851485075a06dc541e9593b0881d6112edbbdd00755499b5e9aa +size 35791 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62c50fc34ea0ac371410b840b11ffda2c55378a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c564b4e6447b7e86a111665dfc29aa43d1b9c103ecdcf192eb2ca47f14c6579 +size 29308 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c0403666bc396792c390c4c0fb73dc10aa2c9da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1251f043e0251aea13523ed6ecbbd49e5b0b8093e40bc4a21bf427ee43bcb511 +size 40040 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f536aebb1d82803af2c4c2835251e3d5e9a8a09 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc19074232f9646055e750ca58e49d90aec3768db32edfee841708678d9ab071 +size 26910 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34ae194d8c6e909ea04a37a9fe74d2b20399b711 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:674f9eedf144e363d5cda696ee8633aee8670ad544d162533cc16a88c5895509 +size 21534 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d00e491c4c44ffdc4aa49a96edf277b99880d917 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eefe95ae0f283c5d2ef3c0fc0dc36959104078466f6076cd18e73607b07cd16 +size 57100 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e3a6474f129845e9ac9e1bddce9aba12a0a0f63 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb13827ec9d815fd85c4cbd1d7cf5d97a1627bc45b54c578bf87a1a17e2dd2ce +size 22094 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c973f63987841a032bbbe6361449996ceec3ad7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c57ee763a66432eb710292945e6c8107bda873a7ca033eaa800dc9a862cc2f8 +size 168198 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78cdca0cf9be804ae9f931afe42760a88b8ebbb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbdd58679f8358a96274b50b0a4e8f926bb1dc08f05703151a361a3bbcc1e5d4 +size 89705 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e8093f5b4f65c6746a2cbd4ac8523a24fc22640 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97d9e7bc4fec387add0699fef7f0d02931f200b55e4291d904171bcf4beeb559 +size 153587 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..562c8c48e805b5fbd5e1e566eeaf3351f5bff4a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a630e803e31212cec5ec3ad891a012147446331ff33572b7dd24a1cf594ba8ae +size 79034 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4cf5b5247f13057ae913f0d3a04dd07cd941160 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b548b315793c14acbc529cd415f484bf29d53c23b98d0bfbc287526b4081a7e7 +size 73870 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17a0733a9274846eb0f3099add7ecc22f50dc2a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc3a6db8fc683cded71e8895534364b837c5236aa83ee4c9f4b7789215cf9a57 +size 82103 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31912c779d1bdcf53e6b8106cf1213c7387994dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e3d441a0b9b988935dce77a66e874c74540ea13bbcd19d1edd58dc5cbe59f5 +size 89407 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf0b9102b7e3a3284b4667fb05a96a3b070ea433 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:593fabb8c88628d46ac8228e58f44ba895024b6e05e039056d6a9396854b75a0 +size 1000231 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6b2fb88c75e74e83e85d64c2dc6242246ab4c9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa0ec496da006a0bf953c6597ac57beeb9d3c27d3e468afbb46c6cbc098850a +size 132315 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96b1cf9f0de335be2815b3be1e693afcd8e1d959 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abbb9bfc707c13303b95ac59de68a9c0b6ee1e78d30b31ba56594a58a0d8ef8b +size 181896 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d879e42b85508b04ca8e189325dce3d07957a22 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21e5b01b5ad61e8c1c404ba0374d5fc4bf4d79640ecb440e948141c0b1831af7 +size 27347 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5479e5278f3ad077b15ac643cafa31acf9b63964 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275c2c497fc29c586dbec123d818b6198677fe4d0b7b5229e257f94d7658e10a +size 119883 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..345e3742018f8131911ab43aa775b2656ba81761 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124c43d4bc0991dea965750721f929249043be78b24871abbf17b61a9ee30902 +size 57695 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8da3b35f4d27c4e83585a6c772d8c815a5b6f6fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b6a01ee4b35d341e53ab1d0b5dff872989004075902cc52a58e90717967d878 +size 25258 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07642a977628cc0247b7a6a06b3e936fbb8bc767 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:790e0ee0641ddf6cbbb9b569c1acfe3ab34993cca7d6f8f9bc70ee3d18a77d34 +size 39261 diff --git a/eval-results/mmlu/0/ckpt_054/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_054/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..605442a635fd49c51bf64874eff7159129fbbd81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ebb3506bdda88835f4397b107b9e845df5f453689014a036713284cdb80aec4 +size 32773 diff --git a/eval-results/mmlu/0/ckpt_054/results.json.tar.gz b/eval-results/mmlu/0/ckpt_054/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ccca9f97aff55d56af94f98d7d286c92ff95391 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_054/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e74b71239de149f73140cd976e9c732e4f8e647587b41019ffba97eaf79c552c +size 7601 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..323f40fec2e6db321f43708a9a5b333d72e6bbca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:501194f31e70a0b85e95b904ab4f0a9c1060d17af660e2a6adc6ed49bf71937a +size 17016 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf6259acf12df6a7cfbbed40a8a4c03787fe2402 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:882598c30f3a61d382ec93a6c639aa56a0408c3c27551118d7ea5040d586d263 +size 29637 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b6a9f4da5ada94180a3dda9e13192e6e9bb2815 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85c4326db17860697188ca7dd3bb0146156e5a8816c4d52c75b717ab832cce51 +size 39654 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a9034c55ad126806bc05c6450b72cd0c0c077e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cc578a9aa031b95b156a0365474cb95b3d536678843adbc5f226f0d9b1a7f49 +size 26604 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c91e31019f0bb1c30c588b0fa57813b2ec7ecf8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:678e5cc15ba55e658edf594cb51bb99ec176e3ca0bdc23a682a27a073ad2d05a +size 60923 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66f7491240392ca1ac2e95d9f6bf8b0a3fe50889 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:284faab6b7de6338923d73e9d969c924a30907df97e5767d6a981a07ce9b4e0e +size 40161 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da68a2749e38923f7f037981c4bdd4a9ac2ca14f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03565166ef5e60a00f7d92f83475136db0a3e5995b1ea4268f3fe626ba686606 +size 23713 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..249942f564adf3d90b6bc133ec247596f80bb62f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed8c8bd404132960feec2dd89693c485b2906d0fcb735ca454e4371892356935 +size 31004 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58a863a253574f745cc3c744132939b5bd5962d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d599d8828cccb65111c96af3c281262b5f109c428031555cb26a3dc5dac4c014 +size 22905 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f965d7be59cb48b1c9d95934901d452803303965 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:639daf9d8004632eaf380f43c7f277b3b08424bf88c060bc490bb68fa3b2d43f +size 60670 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f698c7116b7383350c20d5442522348df6c652c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cde0ca4496f30843780e182728d47cebbc78da4594977b7b186229178e17d33 +size 25630 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f12e0868648001f520a5b58b2a902cfce41862dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bc80640abb7b2308add08c5b6e54c21397bbb2cf88bb2257ce7a0c7d7b516e2 +size 25631 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4be77138fe3bec275685e8ed6eb0e2affc2098e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00374c3bea2b06faee536b1fac7f94f7cf0ab31b2639080419dacf9e3bb18965 +size 46224 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc65805e50608f8eb9bff05fb57cbcd09abb94a5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebdf426ba1df00c3ab9d0f24888a28972c3c29c387a0bf760d35ebb6c0b0fd78 +size 31364 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65ca0b5747bb2b4c3fc4cf0db99019e11b0e0c41 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb54fd8daca8b3c4c4150d89e627ac094ac48953d99b653f98bdcd47ebd5367 +size 28653 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fd735ed2447078c9f2199b48c94df7234bf1454 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:983949fe4d629bb1ef60f9f8d98ef95c3bada6a87a6d0459806919a5a5fb2d3c +size 74118 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5306beee10be16a788da022d1ba0d21e7c48b5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ba72582ee11408858a1bf30f8b452fd6e336560ffe0749984961af2d7b86b4 +size 29952 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..592cf1330bb6517072307e6b6db38a711cd5eb9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f93cd02246726af9088f78d95864908593ba1d1defef762193fc7ca7c8e573 +size 19044 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..532fffbb555b56e60a40bcd2860dc1e5ea0dbff8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe9f8c19a28a86d6b5e4188720abb85581ae8ff7bcad764df25ab8c81f37c079 +size 87568 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa65e74ff373e54eaf28347a71a7e2f8c835a41e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23bfe42ad981c359343973715ec59cbd3a7254deaef6424ed641e69e82aab8a1 +size 49742 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5ac36d93d84e36492f8ad7dbb049695fb68b453 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02fd1c54e6442fe0c5c9a666f7d301952e3a1b5e1678cdb4869841993d775cc7 +size 31442 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5602c453729300cabe78f18cd3c1a4e3acf5e3ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58f1e6291e89383586467de1466b8755f12d01a347a66719bbefb864b6aec254 +size 145023 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b896f533ee96cf6159203c2e210dba56b4d02cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed563e493cfb67c0acc1c47c7e7fe473c7a829f4c051f5439642450f38957d6 +size 44341 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba5bacdaf72c438e0e1f5f80f8565aa428706ac3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52d83a223bcff3605ac0086b3cca4a384d883edeb5a93c232d27e062c6cec6c5 +size 54158 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c62b402ad7ee3432dffd024ed9f0bc7148845d99 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d26bbd55d3eb13e1e8e894ad5cccc0512bfe4896958dab99794b40930901f33 +size 92204 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e128dc1d7c01d84c03558dc8c0bb680b7b065695 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58774ec45ca0473812b0730ba3fe5dc726c7f6e632ff46acd976959eb6274dd7 +size 56862 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f611accadbda3edf476032bf04f100df1a188083 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b6bc80def989d92638359f0d4c184bfb6e05aef6ff5b79998f2d15c16462c5c +size 58166 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61d4265861a91081878879ffdee3fed6d795315c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2ef7df1744d98658fb1dbc783a28b9f90c5004d40f14debd8c5787dd115717e +size 42696 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a411b588b332217fdc7798f02da3dfd3f99056fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6dd1cefd8542fa2bf391e26bf483d5edfb9ae338ce60fc510fdd5d3ecb47e7f +size 143199 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a472b35f70247bf6622e8a07b9ee08d089b4973a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49daffaa288d4443ab03a5bf77ceb8ccac8ed042db66b2c3814a66120ddfa645 +size 70840 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3c42dc6941e5c6f53334f27e6b09b1b899b6027 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c28ce6758ccd2db887f8d4d725433c9a4d9496c1e38ee5ce7254dc6713f7d7a4 +size 161600 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ed2424963453b493551df10da0bb0f048daa13a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab60d91b5de0f8cf083ffde166803ebd64136c83ba7c7df58667a2d9c46fcac7 +size 208921 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75eff0751596f9d8a16e34be09de9408fbc2b41b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d53214f914c13dbed83c7a9db52c73ef440545747aa99dfe2c8ed929b0721e2c +size 49571 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7d804035b94db35397d25767a1fd9b700fb9951 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e083ed4764a5586f19eafe22642289a37431043f325a2dc0f77d29bd50c08a2f +size 31339 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f197cff582416eb41eb9f9786634d683e6ba03cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42da7b644919233a1578206125b4d0ec513eb4d4ffcc96d5f557ca14d21b0f12 +size 35865 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..662e9117124747ad458e8f34fef30e801514b17f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da1c2eeb44e0045c22a01362db933b0535a92f1e073091530190630bcc77501f +size 29295 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2704b1799a491428bc916de1bb7a12a87c90c2dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad41b181143de855db7fd6187f5dd75c08783036f7d84cbbfc087bff3242003 +size 40083 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e749836533aa8208824f6d00de4c3d7cb96ba146 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b33ac10bb1131d1dc034299f1b31cb247c3ce1bf3a827cfb67f3e001d79262ae +size 26899 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47b0f2a5648652906cc7b9b310598749d2df273e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77afe00dff08c00396f1176972d9715f3a23b046797ce90179c3ae08277d04f8 +size 21578 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12b56928d6ee224d76f79000f0089cc438e3dd69 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9302361f8699fdcaad94e70a978928b7a47d68be2b19f7987e8a4fb9aa88ad44 +size 57196 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07ab674e025e359ac1d44f98ba541d1ea1d65963 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4654d50b8e9f0e484802bf52d8924def8a994e47742dad5df0edb39b8c91af37 +size 22172 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d57a737a5ef9e1bb3833712d9701d4390ff08d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87c1af159996d8ee0bdf339e75849d45ef85d7ea75de42d8800e8c56feac79a7 +size 168413 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b0cbf6c76ad0cc612d80ce2809f83f53f375c20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da16236723d5380954068aabb92d73163e909e68e6b3704bbb12b92f89d250f +size 89761 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41036db3344c2429b6da30bef30762e1f3f1c225 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:691eedaf7b5bf4bb43f511b8a399cbc7455ea97a221cf4a6d10f11d4888e50be +size 153397 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bac5a8a290b2daa654dada85889edd1e185c279a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aa69e2af2d0d44a22e6d9d6e54dac6069bbecf53139c49b448695e5d995f57f +size 79081 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a93be059e4248652787885bdd83c8d5b1889e3b3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf87b54cec42b3eaa0cf2e36b9dd4d996d480d5c99842493eeed6116049999d7 +size 73948 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2f0427adb3e5ca4e43c53625140d21c045b8305 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908dde47a6b5edec537860c9882380a69c9c3fc242f194adc334582263d09d20 +size 82129 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86ff05ca46c60509888079fd92cc957f9b80cb07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a201bf9ebb2100b9c18e1154b8e4c50d1f459b56d036eaca296f4f1f19d57124 +size 89618 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0130623ee76ee1c93e2e5bacde8a61bc7b151263 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b39a2eee89f668a5e89784f1cba70dfd66cab236986bdede4ebcb4bc92c4fc98 +size 1002018 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4880fc065ccb4fbe1aa9d2a941697571ff82ee8d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a4e594f9f6659554d0b62c5ddd112923a706134159ae08b57cbddf5d0ce610 +size 132604 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c50976303dde4a0bdb3261adb51e340b9d7c502e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86134269ade5fab138caed9a6dafec7f0c6fc24fab5185e80b6f438307925287 +size 182175 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8a191981f8f7e555e4d9be8dda3455a880e2507 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f063d75a0e20b003684cc6f72d613f877199d6107ef08be8459211a5dd03c7b5 +size 27363 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b69d80c5fa6ae01835415372de534d3b35c2b4d2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ccb5dc6b83685287ede0dbd464e0c5f98bfa92a5ac9ba51e5a102451123aee3 +size 119956 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4301bcd5a331473ae84047451a5c27be5b48bc6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf80abfa3aa03cbb4fa4bf65cdafa5d861a9ec29a44af9a99dab5afc3a3ce4f4 +size 57763 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a6a9ec3544245e24f70ac9656d090fbbdb6a360 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff0614245d8a4ff0c8ec090ba07f166dfaa878210c3f39c698e88608d9de115 +size 25277 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45953ffe9c6e0cdd17f101cc713834766d883421 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd9a3bb146c0c86d1f5a6d243d00602ab69be81bf17cdb1b66605dc7a948615b +size 39245 diff --git a/eval-results/mmlu/0/ckpt_057/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_057/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e8f23153469d7793b88f1d12537f981b5684bbd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2bf124574a9ebdf53f69774b0f63e4abb9db168df192298fe123b092c329f8e +size 32767 diff --git a/eval-results/mmlu/0/ckpt_057/results.json.tar.gz b/eval-results/mmlu/0/ckpt_057/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..879cd33aa4eb19f6749aef94c90cfed3d2bb125d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_057/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba2532896015b01cc9e911a8c5deaeb0d2de58011e87d88c6103960d591cda9 +size 7579 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f2471807f33b072a28b36b19c0c8e26f23dca8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c0f6e002cf733a20e274f49c46493629c5628fc70441593e320e684ac064a4 +size 16987 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cd73eed50142511627f817077f6484f377fde96 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2cef5f3c8be4370ff64cf713d58e6c0b08ad8e30ad5bc0287135e28db414594 +size 29658 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19d9dd5988decd745351ed6395fc562108d1a4a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41bacf456e9ca7b7d1bab500a397c7981fbc8fe429ef7d8087cae5fd33924cc0 +size 39641 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49ed115f6a351303a53de0206b362c56373d499b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7360d1e20368fb991561e428be24819e4582c9b9807bfd761ea407aad6b6f931 +size 26571 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7f18266049bc1e228995da6c11eb47d58824f79 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab354a45d916117d9ae17fbd663dbc10e3ccedcaf0542bec7cc67a51adcdc5ef +size 60854 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5c8d85dbd818a660e2c5801232f0a98dc183b1f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6418fdb34c3343304f3ce5a3244fc61526a0abf2304f4ace3a5df21b842d5e +size 40140 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6ca0e47383707c3245b7d9757c64f2a7245a2b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b831a20fb11d48a67b7b46b349e577d669dad12bed1f29b55500de444f597b5 +size 23666 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a91e069d3a8e802c9334aa84e3b8b8ff9e4dfb9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17a8eef9cf4d8cad3a645be543cbfadeea3c0e01aecf61a80cf2e96aeb794398 +size 30965 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b52d9a7441ce66eb57d8959cc1553069b20b254 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b13029ddd9d7791f28d3bd4b02c8ac4e44184bbd085ad916168e4f98ed3475d +size 22899 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23ff7a09144596d4466df828333a683dbcc0050d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bf646421326760c3d8d8dd15d84d0d0460553fc9e1406b93eb596ab19ce9343 +size 60638 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f11d6544d2b7f23025632a16e7eede97988ec1ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4815fdc7f3691a0e40b2d437e6a4604a5609c549b3471bf80cc3fa034d488d13 +size 25596 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..005c70861701f8aa25cf7341ee95b98e9be7021d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b0db81fc8129da2b6a9ddebe86d15c629e6dd5ed033b99df60a8ca320eefb8 +size 25619 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebe67c3acb9253296e1dc5558470a2a0d0aa114f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abbd95e596ffbfd89797acf7ef0caa741fc4d0c2d64ee21b0ebc0020b93e950b +size 46246 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d100c2e2c272f8f18c7b757ef212fc18500b335e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5daf7f60b1622ee84c0d1ed4cc64ddc34e647d934db4fb9aaa9425166d967c3b +size 31353 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3aa33f2eaedf2640b7b935ed06c3730d63db0f01 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee5ad7bc204c660861e0bc6f5b6a66029ebfbb4c2749928c6b7fd89db1199c8 +size 28660 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8080147c9e697a76be3e20ab98ce04f9b81253d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a9a4ecbc31daea8a802fbc4d3b965fd01a51ae0c2ec151e9bdded2efb4cb1ee +size 74165 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb4c4ca1755a66cc2fb6c7a6583daf86484c8e93 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6caadaf15d4e635b6ec367759a69c7112fc5baa51cc2d1b2f90b7a5911cc771c +size 29926 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95badda26843524c0b05050782499bddea97499b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f8d0908c58de275b0b4cbcb20d351db7c38ee72961dcf034ae3a73ebe845b49 +size 19007 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a358bf3fa2632f244087f0a8c1e779dbb54b582 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ceac0fba0105b7a446602403d32df8000cf4ccfbccd722801ff88dfe0ef707 +size 87554 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4d3b4e6d9b5b61fac2d7c8d693ffc37f79289c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec77f3efbfcd971501f11f6b0abe527b049327c603c9c662cf0061afa907ec3 +size 49671 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4637730205a1278db845d2b01b728d439cd6e93d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99064fd951eb962e4bf783e39a7ceb63879c17bad554184d7f5dbd019f16b67c +size 31456 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6d13bdd372ece522aaa1380f2081b6875fbcf32 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fdb358f82118f832a340ce83ae56d4e9cf4b519478cc8d82b724373e46624f5 +size 144851 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f65008fb6a442be5e538e7a66a41d52e6182f4f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da1514c00cde7e57d48e7de9e42dc5bcc53a0d3645cfa3666fbd33aad6aa978 +size 44291 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2596dc2cf21bdc796c8590031d8bfca295fa26c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eee8e2c73d0e36b15a3a1176ef3b26ee8ba0baded8efabfa3665f8a716f14cc +size 54144 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d99547fb73de53b268576e4a85ea014daef455c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:838b8266981e1958027ec73f204f64901d3cd3b90be316a252fd66eca2af5752 +size 92153 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d631ed4f237dd0d5a5073764428e432f78f151ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd0d0ac27a13e1365c5e5ae26c47c44c974358ab801f6f2d91282ed95c9f27ed +size 56926 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11d6f19fb94051f6307b9a3382d4d60a76a7677b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12da17dff311335ad0789acc0606dbacea1c67c0bc296da19808c04535e7b17c +size 58043 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db102cdd767161a51e1f677d8e45d3541f627b0a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef41fb7c0acde24c3a73a6d409927158cdb39768c625416079490e27fb8b6b99 +size 42728 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c95ba7e42e8169ce6f0a210956075429fe4d66ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98c71f06d32fd69131c200ceae1c1380baf391c301ed4d320a240f1cf88ef4e6 +size 143175 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f10bce321c528114c5b21c7d7c7818f52ebf81a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:640efefb40a8c3efacd6b2649f7501015e2ff6c9f8fbd10c38485dfca894586f +size 70803 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..494e63dcbd2faf20d43e9a3c83f004b18103a3cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffcc98ddaf53d937d642f46b3dbd3ad9f18cde51cbd551f26bbaddb1f98b3ae7 +size 161441 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74607852a7ec77aca7c6c8e15f9a5174bcc3326c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adca09acb61729d4c74fdd35563c6dad0b318857c20ed64f8c8cbc1f1c6b2c0c +size 208946 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bcb7f0093837512b8b828607062f86b40b4aae95 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7bdb97592a4669be467f01c34889acc0b569a916148db55eba8a68c25186b6b +size 49620 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0898e48364bb55f0e94f416f5288afb0f26433ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0dafbd23bbd967ea86fdc269d1da8da8ea807b32a14b9331f891ce88d825764 +size 31327 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31a0ccf4c4b0d9ceaef039ddffaa77be91e00a81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94bd7f6934e11736cefe9f770a4da6c777dd79fe11309fd8d0beca7c46619b9a +size 35836 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b224bd1a853580967ddeb616890eab3dfb28abd5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db9ca2d53f4452ff9b042547f97924685ce83dd7b74fe389e4a8bd6af2e6057e +size 29294 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09d63aa4b205d9e3936b461644583284550ca374 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29ffa1d4247bb618db4d848c615d1467d6eb64be5400b6168a3d7bba85bc6c27 +size 40110 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..220bf9ee3511b195144c79b25d86048186616c85 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ffee4ec96bcfb09f373052357120ffaf8bd44fcf0aadce318bbf8441de78d3a +size 26922 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a3952bf4e86dc5c5b0fcf205151fddaf479912f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68febe4065a38635db57e2fabcf9c32b0010b002d72023134a3ee1066905d152 +size 21568 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8134685e8019d931d10d967f1b3c1e62b9514e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a317f84c072ef0b9b430de9bb27cf3c04bd1d249b3c713b59d081382d5d0091 +size 57207 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ba83039e9ff8cfdf3ebd9882e6f52cefec1f149 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:619b132728fefbdae6f5bef393cc38fc81dd735ee296aca73cf7b10c725f41b2 +size 22134 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..adf5892a1533fc961b9273ab1ea759ba10c6adac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c838cc9e89539677255ba7a9d98a7c0e65f5f8006a91ed0a66721bef5974fd7b +size 168386 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..788119a2ff2a807d0aea8f27cadb533b2f696ba6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:082561316451ddf2bcd9bf074ce11fa00f0b5670aa9d9e5aae72a15ff38cc317 +size 89772 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8594f236a537c75e0c5b9fbcd8efd59ad9f6f9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3961a25cae276ad27e6589f5fe810e2b9dc33b4f08a5e0ed0db56d29c047fa36 +size 153569 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa3e9784b275b82a65d57a1c82b9860c6d1c3a4d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c9e8124b4cb870d86a66b16ae3445815d54fac8081cf946b8aa26f040340b09 +size 79015 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..756fddff31de13f3bc53ffb11d5a30182e4d2996 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a0df1a93409a667540b3f475803506ab12804815e7986f46c4e9f3cf8547ba5 +size 73908 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d714064871b4e88d5610101ac3600a5c62ad956e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b95b74d8fda82f365db573c5b6032a2462b6513e755f2fb43de02ccff9e5b799 +size 82181 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e21581f918aeda054bbf10e009936afaa79d5302 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72af5ec12eca7aba1acbe555736dd2b9e47d90d24be0a6ab7b7504ceba0e51f6 +size 89425 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74b2eb5e9752b3cff5394fba611d22b80a772258 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eedb41ff81cc2918e1f0583c3051cc4893a68e61a99faf030b11f473e663eee6 +size 1001513 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a1a8e54a6466e58e447f19abbb149df12eae30b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acda1d4e1010df5f3285ab6e7b0986c274dfbc3496300a8a071dc6031327c3a2 +size 132488 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb2d4e24eaad5895261b7f75bbe63993bcea0ffb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b861701ca70f7212e3b9c77e9cd794b28af7893eed036b7e83ed028563d4f15 +size 181918 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ba3827175e03477cb0b5ac055ad404980a73047 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e430254c0fe44a3004f041b816296c4d76139e99071a892343ebd3928a6253fd +size 27335 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49465a6ee25c1bbf5b56d613bfad24d7fe208f57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b70b50c9519c419c3112f142462edce6e3e539e37ea87f190f2ee8a0528e0e5f +size 119957 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff3dedb0ffd525ccc4bf71e967c24337dc4c5883 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd9be7d03f65954c074a0fca79b6762b97d6280a8407715123af4b8dad2ebd3b +size 57744 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..399b0e388a2b8432d6b0e0605ff725bb9f64b6c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e01a965f9790e088c54104de3d7a195820c86e84968a06fcf5f4966854de456b +size 25340 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93f1e8da8632697eb591812793d37c2119038fab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16dc3b244accff8128d7080ca55e622fb7af526402b75614aa3be5ec7ba1c93b +size 39257 diff --git a/eval-results/mmlu/0/ckpt_060/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_060/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d544deb0c9dcfd0b0350e9cf5f56aacf8589aae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b302dacf96bed7d7fc1fc9af38e99a7098d7f7b86eabe2390a15d0185f4ba16 +size 32809 diff --git a/eval-results/mmlu/0/ckpt_060/results.json.tar.gz b/eval-results/mmlu/0/ckpt_060/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..457ba8164443adbf9c3c4f1de45c271b977adf64 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_060/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14727d57ff47eb702d2a6bf9b929c3b081cc936388d1a513f8bd7c544f0eed38 +size 7596 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17b3df4c8085da01c99f0b64368bc1df08146ae5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d99539fdd45cc8bfb17e89802b0c52c7f7a426a183dce917b44174e17a45ac0 +size 16977 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3aac062ff35eab106ca7be7a91faf8afcca82d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:057eb570dde312e4df61af5ca4e09c2c3c49001cf92e0f9a9187106321b680d2 +size 29642 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d39a6e503eaec8231ea0aa1050c794abec466f83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f637bc082c30ae095d2a35dd2eb2740070b9f15e7bf9228f59f37bfaf81bf656 +size 39615 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4320d04a71fa4e2c6b2d296a7c02fac3e2d06477 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d8dbad46a068f0d4977f21cf0cc7c638279bdc0048a2bf022666154ff31c270 +size 26555 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4986609722ed1fb638ad88d6ff62702f6bc00e3c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f81adca4586a1d3b936d5004d7a19532eac2c3c9a34cf632a2ccf8686002ec76 +size 60920 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd82f92766dda161cbf5b4aa375ec1416fd6cff5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:557d7330b5c32ba4d94224bc78ca169cacca821d19d50524ee66c47ad27959e2 +size 40141 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b8231f03d9ed60694fc419a4f2bb7e3259d900d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d47c8ec86ddcd73393942c954b02d89149881f7a8545b0d4b74ded3a71377b66 +size 23651 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7d930f9315f1311e62cd1fd7ab70303cc3db53d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30b0a049f21e5504fff29d0691a37d1affb2df08d3d7e31c8c575b5ac49522c8 +size 30939 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f744c62603496695f9ba6d943bbbdbcc2901c8a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2bfb1052b95fd8109bfd2f44caef7111387a7c942c56dec82c0e53d3d10bb5a +size 22845 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35b40567d2e29a11a557975435fb7d37d7ec9391 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33cfe9c3445f474704ac8eddbe87b18c8166328e1fc82b68e61f34192e4f12c4 +size 60599 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19d60bda7b124ecc9daab33a0eabc897e75ec627 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd68f052601750a9be3114ecd492922f7e2a833e8940c5498f42b7e78f721fd +size 25613 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28f2a61f7279e72ecf11714ad9296d0d007524ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db04ae2e76a1a30909918d862a826a1c2dc06a90c18cd65446b2a1c6600a8b90 +size 25632 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1a6039ca94da2d02d73655edf38c75b4f5c7534 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b88d09d4354bfbd677fc1e014a624235ceddf1628222e68d5e6592772116056 +size 46157 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8764d1c56a54f4f350956b41b7db48a11e4cf9b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0952923c30bf4a59168c86d906603c857320726716ddd41c8f77668b1724e5b0 +size 31353 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56d1d8902f488114ef386c43e6f6943ac1087258 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f006b3200bf07332bb6832739e8304a2ef940b131df1612433872dad606e83e +size 28629 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..734c861c03f59fabb1c1537d76b0bed253971c30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c289ee163621c8980e50903c081923634375c47c140f54234a499d76205c3342 +size 74092 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..873134f3d85dbe1b80091c6063dcb35c06c8a164 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2861988354edc8c985f28054460cafd8f9a103e6290d1c7ae8d976aeef88c905 +size 29921 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df0382ad4eae8f5c7f2ca5caf39cff59d8ebcc46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05f597c13663f12ca04c767ea1de836830f8211453b7f7aae27e7487ae7e3f94 +size 19006 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..591c23113361a31f9ec2e2c5a3a6388030e8babe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d44bbc5c36c61e1fbfe915fd44319ea6e1766cb1388e2a3cfd7c8cce596682c +size 87500 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..458aef25f91f7ad205affc959fb988d81abaf93b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca9262012b24b89d6d53f8ec2f2633d2dd5e21fb9413739dd942252835f8fdb7 +size 49678 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4ea2f10e4ded28a7cb43e0f78e9fd579bd75a76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17759d5be2c9c20b5ad42e7360937d8d01617f80efabf6b633ef7545a1c44d34 +size 31424 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0535f0626201df69a2fa134e32c30a4b9599caef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c56cf4b267c00c977057603ac71b0a9bdf5e79b8f8631cdccb8116c3079536a9 +size 144841 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c51bb42916365814086ef6d7245c3798a3beb57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f71c5f10309f0301f4444cc9ca4fa3021bf1d376473ddb1989e6dffcc4b3ff76 +size 44343 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef279b2d5fb717f572f16b53c5144b26be41c938 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f285c7fe38bd0f3b9588b2ad37292e286354ca568a6d4b5edd1d00b9ff7abf +size 54116 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07e3a0827475f087c6fef491522eec428564deff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88cb385159454a36621e13b20915adcc6752e0007568eb824bc806d2e3622875 +size 92090 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e207c79b336515ddaff3ee5a0439c90082958c32 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae4a6ef847eeea46d66c8c4e39539c10a501c2a1545c56d02784b5caaab199ca +size 56843 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ce17bbcfa2c87f2638e56538107a0c25005cb91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8598c6d62dc2223ffc7cd5b285e47882f0aba4d1fd56acaa8f69499596d5625 +size 58043 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8671163055d7b762159ff34003b13b71bdf31ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7e7e8bf6e674277f9164357e693c8b0453bd4641db56d74abac74b6c3f4ff6b +size 42676 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7e84c178383957bcf7816dba5ca92e7c3ac5319 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09ce60369db2cea51204be60b10285ec87e910eaded60d9c09e943548d07110c +size 143229 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c5f14ef7fa16eb9a45070adf09c2a328996a378 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb847159c945f742cf3347845d42ed9c642c62628f19e39b39ac826d9f21cd6b +size 70795 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94ba644f906638cd5c50709ad901fe7ed0899021 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:262ac303dc0f78743c9b5ce86e4e04bfb2aa81601bf9d35f1fbdded67670fca2 +size 161457 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d415905c9cdae239edab219a05d044188bc83af8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09ba621b303510cf1b900440eed12bd41f1c595fcc05d2915bac48a3a79fc5ae +size 208791 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08709659412dd48d47538925660197e218546d8c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c4a9bf5f2796183613f571d1dce8e80ee19749279830d701ead3a4d7a6174c +size 49589 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1d3a05754d59427031144e33c1dd76dcaa9f12b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8865ffc4ff773c8f607fad5bf7985a2e97a5dbf5a77da524518746a95d2ca428 +size 31335 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca860a2c400ebfb7ff120a4d6011b080bfb06d50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a2d6e5a2b201187c3d590f55851a3e061ce0c1120b2b2a0c9266f58725bd49 +size 35825 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c82e774eec794667d53d5081b54b91c3c8ff4a00 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68bd9c41e1ac174a804f2190e8f6e1f969c5aa72c2d3377230c9ab85a1d9b54e +size 29290 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a411a9a3c326dbb13fd76ba38fff3e1c655f409 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:630b2c9d42a454659bf8794bc8151b7932e875354e9f34a4e45f69db5be9c091 +size 40020 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3cd9ff979a31feafc0b8fbfdac862599b669a79 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:692012c7953c7dea199e0cbc053ca75e2feefaf5a99e614349f944fc5ff1e139 +size 26917 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea73ab036415311ce73b4df6bd6e938c43144ef4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bcd99cd18d6a5a3085ffe3c4b15434bbf70a09d8eec7e43c97e2b4ef5821847 +size 21584 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6fcad1231d5148b8c8ae64d15d48ef9a8c8d641 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d5dc0f2067a8e3fcacf1a7bef61b18fef716e3d763e680e76048ba47d742b2 +size 57142 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc90b0ecef3efba16adeb5bbff04322542533f8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0146ca60966fc1ad14766b1015d40c5ac0446db091d29c71c4c7c4d2bba4bf +size 22128 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05dbc2dad010d4dd3888c618a3769850a3692434 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5aa83d2ab714759012d2e41d12ded9a04c422d738b85bc81f2b688e7ac292e3 +size 168344 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..397c4cab97e132e844a027df3026c80197da36d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb059802f42f7a6bfccce3bc3a65578185116958650b4b40ca07f64375583bc +size 89680 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d4b57b01444fab23692d24a33004d221b23a3bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a2fa97f8307e8dbb4b0dbbd1ac4d9413cc5f068b200a86920947c03598c0af7 +size 153216 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a7dccee9179ff22adbcf65b1ce294327e340be2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd83fd229b5f6cb3573b6206fb39a14733ff63e4e71d1aea278813585bf8278a +size 78968 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b815acb06748d346709a940427ed200a2533b181 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3777ea97d2ee4df387b176e521fec7b8fff1639c5e2c4fe97603117508bff46 +size 73942 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4532660c46032f0777cbcb8594703d088855b21 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f7f8f64cfe9fb2a41686a19784b827eb6c9b735f4e2828bce45d4e7fe773ba +size 82065 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c90b972436e956aaa964fcb3d71a33ef05756629 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea2e5222d2c5e1112351df19587ad60eafb24fc40aa8fc2c630f9b9511fe28dd +size 89511 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8646517016cf43a3badf6cb72b904f12d81fc52 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26bb6cab4b2520dae54f7c70203a5130fb052ed4b31efa203525a8cc41946a4e +size 1001074 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74a665c3fffa41b2af497606107112cc560d4519 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ab3408f1e8f72e8d9605ddf5efd060c28b2360068081a45a1cdff7c9bdc5920 +size 132532 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47e8626d361a4548ae3ac553c7b013b94e3011df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea31cceba0a3f3277624cf5ff21fa60d2d84a59ef3da5e1d375962110b2e098 +size 182063 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20dab08f489087c6ff84463e9dd0ed245078c365 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651cdb722dbfb495a0984e7f47aaf01946fc70b5a322ec473ada119723ee071a +size 27369 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..747d38f2c7aac45aebae902575a1448cb0d5a556 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbce2c333a8cc251bba5f35625125d97e59ea1bbebaa4535724f66b95ce2de75 +size 119813 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..509a463cf11a421e1de83a8e7d4403e7e6167001 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76c3e8d1da13e704e563a5c190d3a5dd6fa6eeb1eaf2a74d88cdf935c8c065b1 +size 57688 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5867f7241798a0a26277591d65bcac75a1aebff6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305ddab5b36d2e9a0a21059bab40ecdc43b44e5ca8d10a0b518392b8a77071f3 +size 25277 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68c6a6197f0e2501130ce988dc93192aed4b79a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fbd4d9e00f4b74707aa78762e752b3421db264b0cc20fccd7c092628ad4fbbe +size 39275 diff --git a/eval-results/mmlu/0/ckpt_063/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_063/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2898f87d322c519dec0852d1493775d5eaf79385 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94912a2198f7745003c129fd4e5a2f4ceaf84b4696cdf16cf1997e7e7241ce1b +size 32740 diff --git a/eval-results/mmlu/0/ckpt_063/results.json.tar.gz b/eval-results/mmlu/0/ckpt_063/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d274c5386c195b341bb355639bd969850ef13499 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_063/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:472d31a6074ed2fda811e19e809ade6c3f4eb03aeffdfafa240c00122fe045bf +size 7611 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f62505c8ab19e73ff1b4055716f25df8c8f709f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a14260a02eceeeaace2cb81cff311e8147c865faa75f850cabee17a3d2f88f +size 17017 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2509d4102d5da3b58c3171f15530143f92c8954d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a9345112fc4aaceccc1cba8e0e41fa6d362146911fa73b9288c10052049218 +size 29699 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41af8615104642dd49de4b1d05946234f932a5bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4e819c093d908df627adf382e4eb7ee60388b42f045ddd729048995d4cb17f7 +size 39729 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65739f8c4952f8fd56bcea838279ff218ea73a8a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc25db6fa9f7e61212bfa7fe8b1874824f725a689d0c5155053a1806423e88e +size 26638 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60e9c5856b640c356113afb6450e4b9aeff97362 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa470433f6a7ccff498c1a946f6886d934b1807adffc9ddda7cd4159c441de91 +size 60970 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acbe1a956535bdb316de8e03683fac11c0bb356d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f82d90a957a2fd74273a59eee348d49e888aef8c68b0ee0a2f3c6a67f206497 +size 40281 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e49be70737c1ffc797be7d62f14114e69c3f5f40 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75f69f401c8cf69b48ac9de78212b92f7504947a64252bf13d3e2b7c3c02fd80 +size 23671 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1eb34498a9a4711225bf2c2499bdfb84201cf9af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8622eb78657355593906d82180c352d7559e3cd2b76f3411e7f39ad098602507 +size 31048 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a7341b5a098513068d86cd2a7e2a978fb1cde20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:494319c74863d7855b3619bc0fc628ab5b701dcaf7f08fdcfd6b7763668ffbe5 +size 22894 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec31e93f912d3e826f660457fab1e58f9b66563a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d0b2eb1ac2d1847614344bcb8e79d46ec586ffcfc08d9fd6e2224da4356339 +size 60704 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e09ef9b36291242a256a0e45a18f395c92663db8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46414819913b09a79ff6ec0eb6c3047e5cf3e6097dbe9394e5ca4a5d9c456296 +size 25627 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f6873865c4be3a8ef0adef548bf03c1988854d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ad6fdb6e26d336831152baf000037fe12ec4fb3f73dca6c886fc1d1366e3d73 +size 25672 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a5a4db239411ea93a016ed0c02bef289dbc37ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27fde24dc8e87dbece8fb6cc1137abb42bf69f8f59926392119cc57b886a64fe +size 46238 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cd2245ce7b1d6f1c5526cea596cbdca7b6b910e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d454af852f19a56a472582db2096b5fbaac5dd6b6ae6568753e9eab00860ea +size 31394 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb8106a93e9dc09d7ff5e950d2d62102835e373a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f450717b686628f9673bc9a48f2788bd8f70aba67993ff5be5ca5af27199af +size 28673 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6846f3eedede498bcd7cc2484d991696c866520c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69032f4299189f6e9be6a335746899f25547975c2b1fe43afd62cf39070a15ac +size 74145 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5527f8817b380a12d76f08e35ded7fd06a918a59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ddda529ccfc834b438026811ee56dc0c1e4d6c8412fa9465b9df345780cfd99 +size 29966 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7f7ad77047918e2dfa8c28cb6884b83e92c02df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23b0906a358402db8cb3a99fc6844d94cf0dfecd741f95f5b7179d425470a3f +size 19035 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7dfafe983f0afdf1ff1c3fd3d3ec04f1b647892d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:773e0349a873e35f4f32eff7dbf44cf7b0c460d8362cfd99bb591f637348ad95 +size 87746 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd7f882c9820681b071b2e168866a0874789d90a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:595953d6e6a9ebb2d9c1faa2c86cb8b9e70f4d1f9c655e5012aa6377b7065789 +size 49730 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2c48aaf57ab3f2f1c645edbcddaa1ae632184e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01c210a5f41e1634cb84eb651989a317c30bf400af8286a1f2778b70b236f1bf +size 31458 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55d48825578868e7fdb6fc2f14a23d04556cf2ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8005da0aaeb8552acd7578a01238a340de89e53757bf3c3ec89d78cc4e21795 +size 145081 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b62baf2fb89d02b174703e4ea30e84ed5b427462 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe9907f84390f2a024fea07dcc9dda7ab6d8db9b03ca489e28ced6ecd211955 +size 44403 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec26d21973fc264e68c3758951f216e538af7368 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215f65f2614006d0ad6c1e7464afb0931f0a1451e89d4edd8154e5b865537927 +size 54258 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bc291c20a4947c69fdcceaad82e1de0b0bebcd6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b63fe0b1c8a7d5ee5b3724dcfaf59a90daeb201c8850f771cc2bc51c1d949b4 +size 92295 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..232f4fef9d84e0173abaa2e426e33d179593ae6d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84205d699d5e5e894d02f7fcaba0b88ebe5feed7ce63f3356d0fb814b18a1ca6 +size 56890 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56640d93a381a9f4790ee3bd7a1623e0b4f42886 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7116928f01bb69faf9220adc635aa652ba4ad363ef0c62354c6d5d2e1d8a574 +size 58195 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c92ad0d9572111fc35c31fe71d832549fc4ad83f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e89e64920289f5c3fc3d32d91dddf17f71a7e38a97fef74f081297135c1cb8ee +size 42735 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..398fe279184e129411dcae5246bd2f12b3077b42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5beb895092489f10d3bbb6eb5399ce1e241a10783c07cdf7a46a719abc70c72 +size 143403 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3eb94e9b28a6cad8414708971973360f317826b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:107050a83df78137f2783e0aa0d160a31ad7ea49b3a6a1506e44d7eb7334e256 +size 70931 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b69f86e3e90526c651105676c0bfa630ec289fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a3143517c7abd05491f127f3658ece34805fe8c58a95c881d97aa5d2cca4df8 +size 161637 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e64138e559e4afe6c14ae3eb5e52d475f8426b51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c5bd859755696ff08c0fd06eeb53af14f0f372eb67c063b36bb67f20b09b866 +size 209142 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32d3ab0215962497d09906184052edc43d911720 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea32fcf9e491b0359eb301130214281b3ac9ee621c0141587e7d03138c8bd7e +size 49684 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c728b18838a1ec828a33803df0fe45435c7e631c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eedbed33bf4de66941690a98d3e4b8a182619e9ce2fb7eadc009ab4d9ea2a190 +size 31390 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a6dc4e70f9d58599e5fb2c8047f97023bc5b824 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4decc37ef08c2eb9a239557367e5161d49fbfc0d2340de51edc408d0992837fa +size 35929 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..697ae7bbaaee000010a583d00b80de263557656b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1103c58beaa6be295444a319fbc6f6bbab4a5f1460c1c05c2d710888456411c +size 29372 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2b8becd4524111ad0be19602ad1fab1028491c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47743ffbc1ae824d2a90f5bebe3dc60fe0049f93a27259b8ac57806f1fc02b9 +size 40205 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b95b197a980e8d8a26a9c067b6e1effdbc57fe2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24c2d7b2636e530b7b82886c0b3f1a2208adcd9215db689fca618b4f1f73366d +size 26903 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99e4306f3ff3f920f14613e09eba1fe0c6861a76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bf1599f2de35b4e4ecc26dfb6e9e6740fdd0314b61d1725b86bcb3a66d6762f +size 21638 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2931a2df7a5fa99d57326171b58bd51404ecf7f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ec2b480c99aecd85869b5004e916092ce9b0dbde8a259b91e5532724310d13 +size 57238 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a3301c3e95fb6c8e381dd89cb4174b1588aaf38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a675f9368b95b53659c75f4e4763bd124d5616b88f2cbe103b4fb3e9f234f7cc +size 22194 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f61f762669457135dce1f287b38b5ceb314d8fe5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e539ed03922ff76560662042be022f1d295d996b5975ebae2feb4921538314 +size 168749 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53d2abdfe1053c2424a5b6b2e6a7377764e0dd0a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca4232de7378878b06d13a0ba79d001d395014ed1c2949f210c0e9f48308a15 +size 89819 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06b7e4bc2715d6f35d8f415f1da0915f460d6c66 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1691b82dab9382dab2feabc34c31d694ad98c4460ac05c889b6f52b73b79d1a +size 153413 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbefc734ae267d3a4bbf5acfbd35cc4fd7c0360d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ed2833f95d92f46584b79c14b1d3b89f7f4911aee8277aba41466a41cdefe4a +size 79235 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..573a4a84aacf984e4a57b0fdd40acd4d4fe1c6d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9635e0e4ddf2648c8b4c690bbd502521937fb4b1e712e3a69001b650d9e46a09 +size 74047 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29a1297b4bd7a37df782db2004911791a04d156f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a7930dbf98a5f42f5c134da69323579ff3f0c601be921c34bc9bac8f3eddfc +size 82272 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79f898307c91a9b85ee7a797110a7f924919022e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f75be74c125a0d4aa0b4c7bed6b6dd2f43b546b25749eb91ec831551e39c5795 +size 89589 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c84146b1019514b669e7f7b032636f9a5684b6d5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe7b1eebfa19547b95c8a65b82c76de59d39c064accb4f56454523492f09ac1 +size 1002018 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4363e47b3ff36911412fbf9f14d32a7120f0d362 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9cc0a35541504c516af6d442743b413c7c1df4cbb13103a1b1974b78ac8241e +size 132719 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81da8737e915c1a62a316e6d9b0b63fa8f0c3d3a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade6f947edbb83267560b6b2c4b5dde27014f9d0d04a6b59f5d7adebfb763eff +size 182396 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..120e3c1d28aab89248f40506f5de4a487f6cde43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c87ea533f6c26acf49dce278bfd57ed6662e823e78c40c247c46ff59d1d96b06 +size 27439 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a908fcf543f5fe09e75c1a88aae05ee7523d2b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad07e0350f80fcc0ea8f12c8d6e7d6ab6cb8ffa9e18c064e11ae0440e7a02eb8 +size 120082 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34ad5396443f91d2eebae24c0ba3b02cfedd06ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461583149cb1f410e7cbc54d36332fc79dbbc4b8f4f91322da1ef03a9ccbadcf +size 57834 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9f7e1acca1e825e4e5997912e82642eb6b3665e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdada72453e514a4ab1b52343de623a9417be84e01e48a425406a57af3c40a76 +size 25327 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf47d9d533f29f53418884468c629676660b642d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71e0f3db873262a493c9bd2cc4bc67e4ff5291f507affe9f4b5812e81c6adf1c +size 39349 diff --git a/eval-results/mmlu/0/ckpt_066/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_066/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8729678f1528ab84bacb01e0e8c2f53aad9d4ae9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ab22c1582cddbd4f939399702d6f780b2b35f705ff404ef8b9ab5592c5c304 +size 32890 diff --git a/eval-results/mmlu/0/ckpt_066/results.json.tar.gz b/eval-results/mmlu/0/ckpt_066/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f264b7ef168b38282c0108e1937e55a71868960e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_066/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3677b57a723b61ffcfcef95ff32d49e220a31e854dd1c09a39eb0ed373aa8dd +size 7606 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4df6d74fc43c7ca90ed3c621756caff0f2f9f38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25b1babef64cf1ba761e3a160a4f4021ad5859ca87029f0e8a62871b49061b04 +size 17025 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f875d8281f79b367150e827579d0a6d3c54caabd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a12fd30bd10c58c8034e10ef8937c0eca68185d27960ab24fdc92f8f10b08539 +size 29663 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d194d6efc3a1bec239168b2a58961fff5f12663e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22df924706f4eee14e37622e4d85d56500f9e391c6a4094e47ada4a6b4f90bc5 +size 39676 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df696de38f917a2fd1b54beae59c19b02d2631ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ddb7c4d03d670b5b29a2df32e3e5bb2b6597a6a7cb005ead2c8049e84a5e635 +size 26598 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..effa37f7880fcfc3b0f43c6e444d9470a7605e2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a24e7e2c8ad38db4dbfaef5b6d4a0a5d9ea3029dcb6f68c132bd31f81797449 +size 60862 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86f272ea402e99960510b25646ce4e922e2d0ad2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3edffb7fd03ed75e15fe6102953940053aa82410d4afe14adb274e1a9f90a1f3 +size 40203 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ca11adf9a9dda82b29f872b52fc84dd5b081257 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e13372eea563a0cc909664f24412cc0d8232cd9582a53f3086ef4faba3b32cad +size 23689 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8cd1f48f9e951f9a6d0b0a02b3a06bdee0287e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:694013280fec31c0a8ec74603cc0507bb65dd547b93a1e74af5c682dc12f7d1f +size 31019 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0e85aede2dc7928b7b51b16d0aafb57c2ed403a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee6a891a944acec2a218dbd623cad901a49a26865ac59536f5ce85063276a68c +size 22905 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9bb2bc77a56003f7f89eb73353c9751ea604b3b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bebb16bff3638e479228f4e6c998aea30608219cafd50a674183efb536b6217b +size 60639 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ea61a66183d526a3645f0dbb9e73013764d318a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c241bb8fe7f17dbce30823fec096ab6ff9744acb2a01bafec23040acf480be7d +size 25634 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05b24decf3cecd49138d2b6c9e759d6408974827 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3da89c3275dd1b68c715ee751c07cdc68930164488769794330d957547498cde +size 25677 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27abb2a2f30e0ebc94c4e55a09754901992a493d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1832c4926025d2753510af5149c3d6434f8ce4fe8c966fe884d112f0291e665d +size 46252 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d0530c385026fb12b9ee42c206f7c75ff68adae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f190fa693d7aafe2ad8dafd13b9f8df9fd6fc9dc0b45e8de0567aa4db129c6 +size 31393 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fb5445a3aa34a5786ee02b14245fe6c8391534b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8297d10e86df9c3319d2ab1c8adc66af479884dffc57a63991a44b2c07ed5711 +size 28628 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00ae3bad744899214832a62dc5d133007aa8d4ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c87426218203b06eee34e4217a28abee8e80dbdce94f86922743153667c66a8 +size 74234 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..243defc021067bffbf165b10d43fd69799af3595 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66361398503f76e0c6d3c0085cdf535bf65cfef22e530c343d7d432cb47ad66 +size 30007 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1bb62dc2b75a59528f6594518e38f3f7917614a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893647df19b6127c4f3c5c396d24daedcded724eb165702277a5231c663ef72e +size 19010 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a00348ef2f4d2fb1f5720cc6e3f5b4e87697488f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c928c9526cb5f31b7cf3f66253b6fe5711e3f762b7f27250dce2a6b2685a569 +size 87616 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc36701901942d49fd59fdfeadc9a51b26a246e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:531ef7b5d4f8a2a2e51af8675e256c588382e0444cfe8a3160a29af11f991133 +size 49690 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c12ddaf3698de4c6304d025c78a4526b5d0f30cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b4150655658fb42c64f84588db3725e7be367bc849bce6a71b7b3316ee5c5a8 +size 31473 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b57dc28e996121a8edb31f3624929a65a19e8e9b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c78390208f7dee714647ce18e2f2f6fd865b8b84f351f7b4f9e681c85fbd74 +size 145001 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..939ed959586d9252dc3059afe076d43c31259ee9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff69f3f6e2efa073dea02ae861f08c492735bed53deea9424d1840f99e36c7e +size 44394 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38850f23ecce261ac88aa8d26d75890bd7398d66 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842d98883d5f024d73b6a6322eef7ec9fa4fe51d53d7625507ad919f2e780418 +size 54223 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..334ab63acc710491c0c2463731faf53e775f92f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0cceb38cdc1f92a945616d21851727e119885e5abc1046b4b8908ba9cabeb22 +size 92271 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb03c4df70e17003c5f8c13b2f2e94d228c69f6b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05d8dd9acdb8c3b25683e144e3d833d1a6b949e7d7fc1a993fd53db5b548f085 +size 56955 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf88fd5c42068afbd9124b7a9c27b3c878d3e888 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea2588b8b11f3382cab1a61833f47883af81fa2bbcd78c0345de2b30e8eb9a4 +size 58153 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9122f1cecbfc320bf3f1847d1d02b03902854f8d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6953ae52faf4edd7180fcea3e48c99f19d2413d5cd6f5115437709b6afd5dcc1 +size 42695 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cafd7d4404566f3e1feb5f416542fdbd332877e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd605b993d24fe063e3941289835a9be23f97fced84855a9a98fba592ea7a9d0 +size 143298 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d7587fe243d62b0266551afd7176b5df8bb6bb4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9076ea313185cfbd7ed576b543d42a3b5425b7fb8a285b4f83ea0e722cb30087 +size 70880 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14d609b3b9a15b242df8d9e0629ba91d03638925 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:300fcde5e6153d2a0ad7eceb6ca8aede4685091e8a6d699a098e983facfcc78b +size 161601 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba05100838e7d207ed0fc7069d2c0ef36f29eed3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3cb28389bba7c4e62b8b108a5cc438166b6159de0bd283d96b112295868756b +size 209003 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ae02ad5046a27374edd3bb702e2ed3211282369 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f1dea4a20ced71e327130ea20e752a8d8cd74c2fb911be813c74bf3a70d6295 +size 49668 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b77adbe8ae5aacee6f13c4754644b9c39f18f478 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:defa068809ceb75a2cf612728514a9164bba4d4275a7e330c102c2f13485e1fe +size 31362 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a76c3db90fc05a5780a592e7970f15ce93e6090a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30ec5667c6dc722fba598fe0edea414ca0d7c64649c98613a51abb9e342623a2 +size 35891 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e02e6b69b81f688187f7d71d85e364d9e9d52e79 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71ea307403978fe12e89f1ec95e85c9b2f18c80f6ca8806d5ae3ea3c0edf971d +size 29325 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..199dfd4d9920c44e8a499754a04d29bc8b33759a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a03a6c528b4eb78898661833a508376162cfbd6fa5b41dd71e674929eaedb9f1 +size 40193 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48f2b825d6b94b4250ac9a634fd5abf93538498a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f2ca6ffa3f5e699e9918e7d6bc76d499d979c13e246aefa5505ff403cd3b69 +size 26911 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bcd0d31c747c2efb69800bbc61ae6870f375956 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec81c4dd2c300316c4ab698ad8c5b536de724ba35223fccc40a54e78fa5943a9 +size 21594 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d56bb8a99a49d6d17ab33a163e0e3f76b489e746 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa04162278a7ff5a75a857a5d77bb56f06fd4eacc91ab892efa5a59a98a8bc69 +size 57204 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9d54e93a9571bb04a476462ec6377d00ffa7d83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3abfacafb1d2ac0a299e14d03786f02ac1daa376b8fa587063e7f568f32a6b0 +size 22159 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c282b18fcf9b3ee8bd3d9454f3663a6ea17e232b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a28e23e08f3dea683406560adb71929d6c973dc7bda9188514bfe48958f67a86 +size 168766 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5545c03a138d7c87bef3c3f329ffad502d3991cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d7b5fea7c0ae21d85e61c95beb8e65d52cb5f538f6a787338873726227ce20d +size 89813 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60103d4b69ff885fc68a5c15758c783545a3f6d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf9cc802104f4e55cf3cade6a865e6efd9a124cf60c22d7bea8bfb043aa058ba +size 153826 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..adb4a8cf64473b7517f4896e0c71818e1f70f4a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30aa70052c2901434b02aa38a335fdc60c6f3d06b44e49540ce1e8366e8e0bc3 +size 79059 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af23f1a4a009a3868ecf3a95a305b1cdd8c3497d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46f24e898e8b8ce6af8ce6a1c02bf54429597b2823edfbe9ef705f6dd6d89fbf +size 74043 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1a221f2fe81b095ec4f43d11cfd6e22e98ef427 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9ec893807060d1ef0af9f7c29b49d2c34e273e39246ebc0a24553e4a9f8ce4c +size 82291 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6f7cd34e2b40d6c8c78ae9e4363285b6b2bf60e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb322cdb508093627a3fee2ba147c52a9723a1ade6930d94926976f21458550 +size 89614 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73d203ca84a891b0be313dee6e53bfc2e1f8afdc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233c95c7d97c185c9623ffc7c1fa8707047f87741d87ac1e948c82146f4fbc84 +size 1002269 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51176af6e22a0ae65916f266ed7de01ef6af9a6f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d452b2e994ae6c2ff46a1c1f836b61910ed5f7f80beba68cb87ddb9a23ad76d9 +size 132544 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e41a3f9ace11e320e18b317d3a7c910bebd2bfe1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f72308b8ac90caae2b6404087c7d777fc095503ee3eaf005300dde56f5f311 +size 182235 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b81f4697e346801ab699b2051f670272552df50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5c4424d4ea60efa671be7f775b62f6894607c25e18ec32b05386eaf1819f01 +size 27414 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d37d2213c735dcb877195911dfc487b40de1fd58 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab4a55bf8a2bd37e600d9288b89a49906901b6c901fb094dee7cdbe331cc06cd +size 119986 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3837d17840bf250abbfe52795f690056aa553b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cb8bff7de41a3f9fe5c6e4ce440a0911aa782b0892a123f3365dc7a13bd0eb2 +size 57766 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f5fcc229242679027725d44761e41982ead7c03 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb422a54a51b0d70f8b1b65d51ba4bf1846eb970d15797c807b69b15f0a59d1 +size 25334 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ae5e5a3824cf84c7f0585a6170d1710cff12147 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04457a5b0ee01144903ca0e477e68f5a857c1056c32c0e44fc09a5e61c445b33 +size 39264 diff --git a/eval-results/mmlu/0/ckpt_069/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_069/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97280409120708a8b61a7a86fa548d7bf0fc8ddd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45555b56a5d09a1512b27a5f9b0fa377f2047dba12ac7e4760cfbca82f2c5442 +size 32892 diff --git a/eval-results/mmlu/0/ckpt_069/results.json.tar.gz b/eval-results/mmlu/0/ckpt_069/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c65b571dca5fc0dd377a31df2f79f65ba65183a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_069/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d78ed355c5165b784efbf9db6cb547dd080582d9e0799f9f96a71190924d88 +size 7580 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbb30a86477888d2911b10f201996103b5a92439 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a58c9a369236cad19503d000fc8c749095741cf4cfea4affe9b1f144b7984a95 +size 16972 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4e9262d17b5295b7b1ae991274ce87cf9ebefce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b95c3ca425de29b57b4e3a43a6c38e074bf042fb20d88004afe75e76597462c8 +size 29686 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..770156410739ed77c0ee1060fdb56f02584d22a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac23685849faf8dc5a09a8888bfbb834b388ddf6f4055c69cba1a48e982abc56 +size 39684 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d73d18978ab0f8f1e7354ea993f0681f1386885 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bfab77761edc4f038db744ecab22189273906187d45176c1ba8119fc06cb670 +size 26596 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd62f53ee9434c61df306ac69fbb39058ac7ff36 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b6f055b98d6d7a870584836ad406d14126f2fa371fffa5a10e0c38b98ebdd5b +size 60956 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94222df71351a0ddc690aabac0625699ab0ee79b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b0e6fd3af41d5a45af3c1882d6b20c4a3f03921362bbd443c28ea3dc87cbc8 +size 40264 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba3d61c26289b2a6f434b1cc015f2612d1daf60c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:272d05d638b3046a228f9f9b8232cbb665c9d75df81c9b12d4462fb4403c18a7 +size 23725 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..135d57e7c4b09e66797d69b49a63928a1f59129d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:782210da3745c9f7e3ccf8a64bd9162c772392bef6ee34411b9e40901ab0f735 +size 31003 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d14f04b25b9a5bda91da9fe13a6d3ee999902442 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af79bdc05eadd7f4266fd8a87b2b482ad28573ffec69b4c0d90be3a1a12220af +size 22929 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..574938b8807dc4f7d22509ee667b9a2caf311b30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db6a11ec475976ba45ebc5245a10c4647a81c4670429d45ed8a135b6f274a66 +size 60713 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c3e6b4e41a19c63d6d4078dd3ae87077ce2b40b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba94a4523452d7e18de71d01551a328ee8f6ca653fdeefb8da520d52c964b9a +size 25586 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1264c060bea39c6cb08c96c1e239d61a4c67477a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a3b10415c8680be98c8a5b772731c4e37990d9f8472b01d1d8ab5d416f113a7 +size 25678 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2806320670ecbb77a6ffd8d5f9e17429a804868 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837369fb76d686f5e3a27055870031824b6303ca1f2fc3557a8c8c018ed56dfb +size 46242 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..561b7830e2faaed5c8b08d79f46f01add65cc627 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71f832789ef34b8702fd34283d03108f494bbe3daabb22eb6e702513e01fce9 +size 31344 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aed8253f2b1573f00f096d64c5d0d6e07bb3d9b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31c2f193b164c5a1b16865a9c3591502d4548aff7f6c65ead000524cd380193d +size 28637 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..486cbbd4bb4393255d2b1e7afc828349db6902ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0b29964aa6e1ceb6b8c0e7941437b82d58bd763f1985c0303197b945785640d +size 74187 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10598a00bbcfb312061766d511fb3d9ca6268dd3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3469bfcd5a55e0e6acb5dceeb2286f2c6dab71710afa3ee880827d69cda55298 +size 29989 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d71f41e832d543cb6dccad39d1e4cb2a52c772a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c9b23755bdd97cbcb1ee3683bdd377c76e40ee402efeae5291a4ca116f1fb4 +size 18991 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3564730f55d507bc5d0cb16598a08a1474027e25 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b53ca3516fa638aca459941e34bd36bd7483a22b5a2c4add7f9bd8de0bab832 +size 87614 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5739eec832acf3886b68bdbe80a52f2bcd5bb5e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cc604d572ec83f615459852dda959ba8e793fdf4df59fb9f9f78c2f3c015609 +size 49739 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad0caa548367096637b158b364d42476b3828619 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5661a406af6ab5ad09573460639386b23b82a9dc49b786b6d70cbe364c20c2d1 +size 31493 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20ea1548b0b66f9f310b94025566a80f4a037023 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9bb55fbd21e1f620a06797b752ad6339f1e9866a69de411e89e230e9ac5f1f +size 144910 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5801937cdb6392509e3553ee96da2a1b3de1dc6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9edb692b613fb2cfa283d224a3fd9725500c468d6ebd010616f971e3197ebe9 +size 44378 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a53c6aebe9b17c5a52e9c46b3e67dcadfeb63cd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f287b1b7d9072be33d9efadd733a7c3ffd142a92b1be602e7fa6ec92a7882e08 +size 54242 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3ab04a89472b7f751dba58138c74202140162c9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e452ba781768cfafe06617f074ea010b9b76e63b6d54ddb7146018c5f21d45bb +size 92277 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23d9347352374c098d1dafeea0474a05857db517 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f50e44d86d01c22c80e0abb14fabe9b9aa135d8d2bb4c12ba2f1abbb496b156a +size 56929 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d5dbc583e137ba7e664ae043b1c7202e0351044 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c424d5db3adef3c3a555a58c301dd59c7b70723f85537b743cf8b184874e417f +size 58198 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59127168231647897d6918aef647b0f81dcfa91a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8dd88d86d3c5107512ae2c7a8d6693709c98629929f5e82fc5dadc93c9d0a8e +size 42731 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..265748f3f636b728b44ca2738c66e411e5ee3fcb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c77649efdca0dd80d21221869542f52ad4c91a2b99848470ed78c976e55eda67 +size 143381 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a30a7ac2df705fa5b13ca0cf355de7d6906bf9f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5595f2053a49fe93817208cc58bd67703e5663f09905382821e20bf88a20375 +size 70900 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..146a6809cc87123090143e0a7aa09f9636b4cc31 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02e6fa0e49183ff4f2778a9b60a1cae3a8743874646098943bdae2004cef5b0a +size 161468 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9df5bbddecd3db2fcdb471fe5e9e2c2c13eea38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57b1f3fa61526af65dec855bb3f0c1edf1e1da9cf2851cd257ee5d4c22e7baf4 +size 208866 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cfef6de84b8ad62f5f1f457daa36879547d3fa1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e95b42e92ca203cf8141cb7f5f91f91d07fdeb591b7352df687bde410ed41660 +size 49636 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a2188a3b31cca0cad1bdd6689d584721317175c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955a1f3125da3b970b994a57beb89087f0afb8ff124c429b5066104f1ea58730 +size 31401 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12dec75d94a95fd11d9c91b628533c2c1571cd97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4435fc9654d19f20324803c5bdf85c46f19ab74e2de09c8bf99b10c8b30445f +size 35866 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..102a82defe261c50e5c912201c14a19c955fcf9d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b732e18186c44dd2b81d3e1c93bab0aa01c14de1f5cb4a6ab4c0d8312c5829 +size 29371 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2cd1f762b878b6e760a999b1699f084eefab8960 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f05350e9b7cba6487124f00ff49d88ad7641e2028305e058341e19c1bbe65d0c +size 40129 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f24b7b97f1df2bcfbe07cf8a4de70a32445322a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b43e0e47151d47bb7bb6f373d6d2233da038bf52814aee103257f91789979f3 +size 26900 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9191d8868ef66d76daa68533c336d97e9002532f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4636d31cb6acb1c4772df214dee7a36e4f29d398a072cfc65f0782a42ac8032 +size 21567 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a59b89c08188ecd43c6281777dfcc8d5d320af4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c26aa0002c619cd960bf07fddda631f73236f80244d96d45ce9bdd76b586eb11 +size 57193 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b91ab6ac7b9cb60b4a78c8fdbe2d617133b4e54a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfed903e8e779240f1eec8af41268cf18f11cc689c057f0882186303c2521ec9 +size 22149 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0713104fa7c2c53542a8625706ba15caf57e16d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcf2470db249df35dc78f8dc46653ab972d17bde1f3b80a479bc2f786852f880 +size 168743 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d580e63a6e53a54dfcf1f96a028755ee2474971 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9abb3a7153893abb58942eca87952d3cf5dbc60362d5e029f736b81c349800f4 +size 89735 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f2e2542c53012cc925567b057b80b51994d4d91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9df40eb7c4c21f048a5c50965027edd3bcc7fd4871d24bd6a1487c6db055a4 +size 153729 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5ad5c6a5bb101ce93fb32a99882ff07b148ea93 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b69afbe418ee9e2b7a87fb3a6e31d7486a750004fd1cb3d2dee794caaff0a4cc +size 79091 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32fcc97890b31c13c18a1039ab9d42c3631a76ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfb232f5d5dd5cb187dcdbde627068dfa75431af7cc7bd5d9fbc71a5fbcee20f +size 73976 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f03a3b59819123e0d9404ae1b7660aa41c0227cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25d4afa8ae6bc37b1c81b1550f26c2207ce0942ed90137f0332d22e0397c3466 +size 82251 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a7d53e104a92bcf85782c655ec7a351631025be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:758005663f54dc0a636ef8462db4bea71f99c9a7852236ff0a646112594b9304 +size 89539 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b032dfbaf0b8e7e766a9ce7f5c402c8c14af0a40 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87c1739042c36254d1d72a8e9f5e9df006e4894c0403bc431b1cf94cd0018d11 +size 1001533 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b90c2ff3ddc9d7743bc42cf3780c4d2296412e03 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ded869f1b624f50d0cde3ce6eaa8fe4892887fbeecd8de5acdb29373517c6f2c +size 132554 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4b00a48f7e5407e3031c645eec12300d14a4159 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4c8f85f6788f684b626d8ae4b438e94f43ca77fbe5e06debb0f680639541748 +size 182143 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87897e6f8629eb70e81986266c06939fa41e5d79 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26beabe2e540cefa518e86625d192068c2cf0e47ec0ef2d4395727bb0eb06a2 +size 27394 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be1edec13641357269645a948f1ca9ee5f2752a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264567d38c87d8348730299258f5dc0e7e9cb3f659c59c5726edfefb48d8fcb5 +size 119966 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe2a26b06ef0fbd53abfaf7aa1fdab96db264e08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1367f5a32e398fd9e64ab151824b74b0bb011d785929a2f41872b60cd8471e9b +size 57812 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13487800211f39e159920c78d3cd784d83131d59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:877fc41f5fddede9230692da0eaac6b6d656733e0e9349f54c98df8d0ddfb495 +size 25275 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdfd341a3fcb65635f6a1b03125874c598e73d92 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:441c24cec32be9528f9126dd2e09274aecdf1893e520c8f177a8125fb996b6cb +size 39283 diff --git a/eval-results/mmlu/0/ckpt_072/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_072/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af092a37e1eb5dd1ec56cf51ee34f2a6382cb444 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff599cdb30f4fc36462ec30cf0dc7de1cbd062047a11bc36f5a9d1a9da13e210 +size 32856 diff --git a/eval-results/mmlu/0/ckpt_072/results.json.tar.gz b/eval-results/mmlu/0/ckpt_072/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27dc5c8542a1b7270b817d0ec4b813becdaf362b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_072/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f401c5074b3f898b280e328194d6b9ebcb30152e695254566750f9148d442183 +size 7646 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ae8ebc413020c40f5fa1db859bac546f6986d8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd879d577877c73893c301ffca00b487cbf281d89f91db195bb00e046ff16c3 +size 16999 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0171c85d1f2de706ea7b35d157b0ffb745ab2a33 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57afc86227f9587049ad26c3084e5a0ccfca30227c100de7c64ab0dc972a8bf7 +size 29697 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca86b4ebd0b334ba1315642a5a2664b2cf9ec0dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a42052a5082ebf944e57faa120b54eeb565bee84881e28826e81b23f4fa456f +size 39742 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d83f318d6796e9384ea876bf529fea58afeadccb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1336bf0a7174af2303acac7940cda67e51d425e72358277a64406e5302769454 +size 26673 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25d4b0f3bcbe8cfe9675cb67dcd457dd6c6932dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:103bf2e5fbcae9cb3208d6840a6f1ba8e8475b82c10fbabba32f09e399de909a +size 60996 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93cd8331db42ce7774149c3212b29f1dba017151 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:733851dcd0c8a22a33e75787a5942366dd5b8cbccc9b696b05d756631cdb70fd +size 40260 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b82a07ce573d17e8fe83466ceede94d717056e24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d96ce2df825f5249d9374c692822af092680bbdd7d070a6636f6a10297269b0 +size 23694 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f652787b6d964fe651504c0ada8ff32e01431622 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:676777a8882e9d37476d9b8e0bba4ea398f1216da82df13b629cc7c51b493f35 +size 31034 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63cd43920d8f5646d3eeedc1e32362b059ea62c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e35718b820cf08adad28cabe2069f93614f7ba62862c96f7c4765a0256727896 +size 22872 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be9eec9edd07b48d8759d1652dd9cb3e84b300b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5913146bfd454878838e760e64568a3b2031d5a29c74fcc4e2a7e336ed943b +size 60737 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59fe615267166d2678bace4d01f96c201501a5c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af30e564d330491869ad447a5cf510fed2f7858bbda722d39293a906316d5cbb +size 25609 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c1211cd441e3983eed892a33f580c9d75199595 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae0d80cfe087e7f7e84caef3095e84ca871ae2ddd17e9d1136a76cd1278bac32 +size 25735 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09df575fc34dd457e0ced2c636ef04579575a177 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:159ea0af6c25ff043c996a8101c6f7c87f19368c80964ce5b98920d22f12945b +size 46341 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..481ea9ba07020afceae0857d4d4821bfcd10f075 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:068439b3a83057c8ae30528927081d9b9b0dd5cdf6b3f4c8a13dd0f4da33ea73 +size 31368 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05836a4addc29342c8d02f6ab4071b285405293d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0595096ffc6d7731c7c04cfca858146f227c0c2aac07942441dc18652e7ae58 +size 28712 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13940a056f9b1dd226f8023a078083947ff47eaa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32f829be34c0c4e5cd0bae8e417b6bf79015f745e2336a8e466883e2956cfba8 +size 74202 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..428f1fdb8c12dd9de55ff455333d958c62ec7bb9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce7232796df59e9edde9a2db9938f374740b42b61b2019fc4a9f11f39b909338 +size 30007 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1a438e8936a075656406841d17089424e6c738c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8f42aa0691fd707e292c4d625909d2c4507badf1fe9cf9b388e479a29f1573 +size 19042 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8b4dea59eb92e41495c90a84ae6658a47a5a017 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00f5e82a5fddfbb85cbde420933f9a13f1c944737ca807942620ec37686370e +size 87711 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9d4dfca1a07ee553d9b00162a9824ea43101424 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c391e0cbe6499c55834547936f4c5b0cd9ec751912738f21798535e1c85e731 +size 49776 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c9f097a47f9adde005dcbbd7f45cbcb2eb75e91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:822bc2fae97f9f25896dd7a56a9369dd76bd23d5686a788be28203b5f205c1a6 +size 31483 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a43f227adc5c40710ee9d929ed3e4a27ee407e2f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2a14c29eb6c0307761f68e1ad848b820c5d04a0475e9844cdcf9b2fdb60f360 +size 145094 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b28ef04bf8727be78f6283918e1872c9839f5f70 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:963bce6fcf1c85d365f890115c0ab5fb2c84ee55829634d37c2b77d555a020db +size 44464 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45487ca8234bab9283e49cadd36f15dcc83aeecc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781d7101aff77575a2c945f845b1acd99d3e86c4bc2273001371c08fb9230f9b +size 54311 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22587bb1b5ba0e893891980fd794b034eef595cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:950ffe268e0e8a8626d6b542048e3830ce6d019dbc9e8d7f1cea42444ea9db00 +size 92402 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..894d851d893d5d473d95132c06b09471e8b5c1e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:260b096be06c0b3b5b38fcc799116e1f4644868f2b5055b12daf058f0ba64e83 +size 56894 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4ad3b96ad4c3fed2a9494eaa7a1656ac7fb1f87 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ace8311ace64ac6e2528340486583bd6430b639ac870897c7be9b66904f3272 +size 58267 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e340d842c56ebcf41de74cd584978acb18458fd2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2428869ad7e55f00b95047c5447be13ecf8ffafb3834ffffad30bec7f25e6613 +size 42724 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..298fbc961db86dd45597b1a6c5216b02a41b9bd9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f0188ec90e77df5c77a3a8b4023e459ca5624af5d9ddc5bb46cecb8b526d5df +size 143654 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bed4804cfa795e4f53d8f245e3a4fa604ad58e73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd0364cb7d90afcb5cb49e76cce69e1dbebf7375e196aa2ed10e50471cad26b2 +size 70945 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40363bf3e69c09b42e25feefa90ca8e69b2ee9a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0bc77e72ac8a4725f70025456e61ff201f97d9211e293f6ab93a558396682aa +size 161619 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3059f1eefc82f4772f743848890e711816087318 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b741d3e95b4c2b1d79528d802f426ebc9aa68fe17c31ba3b09ab86fd63a7938d +size 209102 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d973a3ffd20e082d0ab84fdd2641dc52819f5fe6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2a7fbe9314f689cbdc15b1c975f20789a2d40e607e39c8bb27d8f3c8b471c86 +size 49743 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2cd76a7b6e906f88b497195e4e95bfeca7de469 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b440e573c7045e03ccf7e804559b161d1177bb903b0e5f097fafbc99ef660c +size 31442 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9629c3381dbefac85ada3c774239573b8212fae2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b37fdf4414625bcb7063da1ba0706575f28218cdf9cfac5e57276fa4d6dfb989 +size 35962 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee5549ffab99b774ab524e703b91081197c7a36f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64618c346159280ed38e5fa3abb9dffda61c326e5ae3b2d02e66e9d0b2a31eb1 +size 29411 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00b6a1a7f5468a7b0d3fdacf26ff7e0040328199 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1546b53b89520f667433d1b4045f7ecb1d1186f1c7d90eba29e617ebcec8de9c +size 40233 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fdbd2cb318c86135af058ba9f2c322012d854ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a39d0a044166021c695c21adfd6f7777608a4d64cfb4bf2b2ce842c0460fb3f +size 26909 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb404fadbe454f3b42564b65e252110ca7a1b980 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a03bfe9caf295af93513db00a124304899124b739762759ac4e072dbfb30990 +size 21660 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d269e2f11aa2eda7766bfbcc2fd3d33bd8168e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc6f7437763564989460aeb04e64552e911c16fe6788bf2061a4eab37ddd961b +size 57287 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c48e638676746fd95abafe33531034952aca7565 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19de8efa94817e9c51c8ecae690775bed03074157fd79282220d9bb6041c3c6d +size 22252 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23a963a7e6178180335a0867529348fe7ba5d954 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38eaff61e18dfaf3b02f01f6e9aede7811f783596236ad560ad429730095d75a +size 168910 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4dfd4468c7c13365fe372acd447c5fefa0d3c3db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1acfd03053d86cd42923b5e0d7be8e8a9ae2d7ac97141ebcc1f88dc78dcd487 +size 89966 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71685747acc2d8f41177115568839621657d1fc6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a16af3ab034e357363aec34f311001447db6398f1e4483ac9623e002abf4526 +size 152891 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34767d9bed0b95b1169384a9a8c5ed0d837cc2f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf96ba8c279b49d2dc27107a40bd031ef0fc69700f67345a76897e42c758243 +size 79174 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb5eee1b0da570cf56342d58bcc365ad8e19d1cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4f3a72221b8c27d23e8b362bc07f55a3a3ed738fd355d0b429de3b8ef7b437 +size 74170 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edf6e0c6f176c847859ef07d15c47f717b7807dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d728e6c3f075827a8b11c2477be41279f7e3d9b2d97aa4a6d2f5bbde53e9b4 +size 82354 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e0cc8b7e8c154784ef5be808a90795816c49124 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e0ab4a8f39ad68b1e2aa1486f3550025840d931df99711b5e06b3aef14dcaf +size 89639 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b4a254ac4f1e845481d39cb33b1c79e1213be63 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c0c9a6a956dfd8c61ba346c1b76786ef784a054136462e1ed52e2718c942910 +size 1002589 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f06969c22ad9accfe1e05ef555134062ecde2f76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e2c966431bd2bf1616fc5055b34ccf8d5bf32ac11792c3456954ed97d82c07e +size 132620 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0233002cc36c528cf235f0a6c2736882bf85589 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1597b3917fbf843145820314d6a9e53939b5d66a4997e85e2639bda748aae051 +size 182429 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88b7c4d5c3c0fa86fe8b7ee32c52a062da052707 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b732d0b3dc6d230210f8a12e044149d4e8ea720989ffe61f834206f5bbaf375 +size 27471 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9502644295abef100117f71fa56868a6a210e9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a90796cef203cc071b66e9da8a1d5cefbedb56276699462acdc5548a74e3be8 +size 120154 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34f2adf01b5b1aeb21c114a06f806db8d98427c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f9d664b6307a7c1b564089b38a5a98b1007fa69a6e31e02b7b40944b3df98e +size 57881 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bacde7f6fb223d41878f100f604545787d7c12c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32971eed82a40847a3090f6b467229425e637358ac5810b84e8dd405311de5ea +size 25359 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33c346204f871e1ec8fd5f0dc1cb2491a4c42df5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b55916161c825a2bdbf106e563ef66b1f8bcb2579a97640819dd2af85d1e27a +size 39349 diff --git a/eval-results/mmlu/0/ckpt_075/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_075/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4ed1fd8fb60318bd3fa5fce026637bd600d7795 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5aee196f904e7783eaf15a6d218c3b16017f8ddb8faee90c204412dc1b10ecd +size 32879 diff --git a/eval-results/mmlu/0/ckpt_075/results.json.tar.gz b/eval-results/mmlu/0/ckpt_075/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c7e9d9629c890795f2039c3e216a3fde7500dcc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_075/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d268ef31189cdc73aa21bd35206bf1bea611325c5fab7ca8aa16a46fb65790 +size 7616 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6482718a25fea83190a56b48d7f7b368a47d4b9e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb993b657027bc0bd61a99846997ac3dd2568f8dfc5ceb34910287f6c07e94f +size 16990 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37320254b931e16cb67abba291416fd6c7d17a46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef4a4ce350416180cf38bec5fd37251a80dd931696e8678f42211ce52f6310f +size 29739 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a191da221f5abe48e8bfde1683355ff75024f569 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f010819c121c4b9654344fc19134a792d6dbb654ba8acfb6a524538ae42eec46 +size 39780 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4167b79dee8f016a288228a4eeda5d4d90a55aca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4de3effa2c57331b7a535fae1047d4d7f308b8fb16ffea872f0519e532c42cf +size 26642 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67047d69a40d5e3d73c1ff9a49bc4bc1d6138edf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac7414874713ebe6c3b7535faf0f68421cb5f71d7120e9359a42710e0ce69597 +size 61073 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbec6314c8bdde784680511d2b0fa7109d50880f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de9176c93c7b06e13e8f5f560f7653642573af093c71b06a00f3d4ccfaba95b +size 40302 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2446cf07af9a2786f9a3551fb507f6f48d969050 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b13286287ef2bb4b3b58a54514b09d4bb65a102cb2aec850d34470de11a160fc +size 23724 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8d8f3e5c6fde52c2ca556d719b7dc0988da170d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3275694830ac86764d695944aafdaadddff1fa269c8d1bbc2b1446c1455cea3a +size 31034 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e5c5667818801b7adb1587856609478f5e0cdfa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a3be9f963f08263109660290afb8b43341bc993a78137fe6ae04d882e02bba8 +size 22855 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..364aaf8a1527cf0cd3344ef5047027b3445e2cf1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fb89306a8f2d762fef8af33155817a3d9b2244a9710fba92904dd4a87275464 +size 60760 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8cc8bf04f37591a28b4dff3525f6eed698e11c6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd28087e258e634d156e7d47173fb59f87beedd5ea4ab39d3bfe081e93367dc1 +size 25652 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec2606d73596710c821f6dc32c6f45b33c6f39cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f9917bf5caf1053f34180037797d245cc071271544216a6908157907f84639 +size 25739 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..832cf18a7c5522f56a9e60e28990935bf0513add --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5581ee2fa588509b96cc3fa719bdd2aa07f6f0b6386872a050875a6e5ac8326 +size 46365 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ac13a3a911f944ad50e2d760938594d0a3656ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0988fb0a7309152b2f94c9de913607bbad113fc1295468763696257498cf301f +size 31430 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1811cf0dad3dd16f978779ce80fe560ea96826aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628b7b62177de013b13e157f5a427218e2f8e708c7f24f791d2ccf2f1808b7ca +size 28708 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d9fe989be0c947014b027eb8b85fb1b94fccc86 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22099527f1ee28dc9a8d9188d4072232fb56591da0bdbd5e940dc0c2b3b0f69e +size 74259 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba5e77548e250c4328093fa7734b8a4a7943ad3e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c7882d968ae68d089fc300a3408a5b42b5f7f4dc31a58fcc38aa8cfb06e9ef6 +size 30030 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8000799522de15d42959a9fd96c49f507674284a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ec57321fb324ab40901845cd3dec4b307ecae9d6b116c0ef1271d71429fa41f +size 19071 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19c673188c9b70ce42909bc85337233cd72302d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17f207795972fbf66ab27b9daca89bf53d29b3466005535d72be74a3110f581a +size 87774 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc0adf6f610e205d75c9c006527124c6c774e90b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b431a38db6a6626901fba32c52f322f64e272ef0b90e715b6d46df271f1e0d5 +size 49837 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ed73695f8aa688fe7231e1db9ce7d9978f90294 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a2c8db818e8b300b10bfdd8614c451a07407a8b12a7cec491f5f1658366e48a +size 31516 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..448a0e78a45bd960d21a32c789a11bf5a33f700d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c85f4c5664bcb3afdab09a39bd962cfb3f332ffcb82aa85171be1814f579b2b +size 145243 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1075a90f58f3a3a41d4794ee52b54c66a52e564 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5fd64737d1886cdbb7be4cdb6c501d18ec45803aa6f042273dc2e02da45c569 +size 44513 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a2d136e7e92a26939233db2d4c9f1244ecc6556 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e412ba854883afbd46dcad8e53cf179eb7bb4eefc181d9225bb223c15c9eaa7 +size 54320 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..346359471a7e349eebd0aac4f2d27ebefc48c339 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d34d420857cf5d7f6eefdb5f441c4b17e552e59d6c2d1b8089f4ea5d7c8160d8 +size 92438 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5464783de67ff3d891f4654a418377a187a3b73a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d67a98c568827d49b856a3f5dcfb26a5f29d7412ac25a11be205cc7c58284084 +size 56972 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17b3a502ebab76a0c4e477ad9f202b9a952f74e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b87887564c57e75c4f3ada556565cc0bed54e82b6eea24f08b061b61b3bbc48e +size 58267 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b624c44754fa9d4a8a2582472effb8c685871a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6c9a5e35722228bba37cfe0034d284609506ec15ca0601565053f87b15f4114 +size 42714 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e9518856467987132f353a09d53082570cbae9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9611655d1bb7b7583b44e73e6227efbeb9334481c0ac7c24ed54c2e2b95ab7c7 +size 143691 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd7502ff09777d020c1d8edb1d04811bc911b530 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899df6ecdf0f54594913c0d48cf55b997ba4331255947604b6535d5438703236 +size 70969 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2bd3cc84f7efd0c2c3ca561fc5d79593ef9e25f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f06bd2026c7ad0ddd45d75e74e3116b1c6b44fe2f36bec02df457fac86a57e63 +size 161732 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e556d4f428b529e70a335af26912954a119cf4df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea24945556304d589a5ecf26cd78504bfe835e367a1d7b453925a53ef048a43d +size 209255 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..202cb91d96d110210d67a8b528fc0cfe957f82ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:067ac57ad10d7c45f2d1e0e7ec453c34fa32a81c6f121419ee495ee2d89671df +size 49764 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06ecb0f754ce8f24a9e4a806ce0328b8b2bd74ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3372b905b201452285cb371e3ff5499fd0b88449710ad1c10ee27a14cae3e14a +size 31486 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c69b83bbaa2551f87552aefd18f854a0c11dc97f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b40f0f2ae21a93c93acb85761bf79b273a52369326ba2411e37a0e2e389307b +size 36000 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..699ec24e6bf70913c970852271c1685124d71ebf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55bfcc9cfe898bbfabe26d2d6b89a26085ba4043c126b9acbffe52c5f3e7ca90 +size 29401 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e891f97aa2b0a6ed24b868b8198b210cdea7b57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:642863542da8513cb4dd809d7f05166d5ee8371fde1586f3d24fc6109f34ce0c +size 40234 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b213f08d35a4887e8520d77613f82a985f3a155 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5e3affc321a1dc3f086169a5c9807f0523bf875c28e38fc45154df90d3ddb4e +size 26956 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e0a4a1cea908d6766f816300531ea32e7eea47b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee58abd4072959f155310b439636550cce3a2917012b8d0f76bde635de0ecaaf +size 21666 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e224593706b46e6001602a1a256a69a8215163a5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19b3f4f4470a91e3233b72e604c81a7ca3e5c21b9edb412dc53663ad8dada430 +size 57401 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b0fb3f20069d6e190a47187366cd9a10a0c5d40 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c282a68ca1271640314ea5077bd7f26ed2afec27767909b79cd0666b3bcccf5f +size 22240 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f49a2a45dbf6e93dcf3928a8d2a9ab441bbd68d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55e619526a2b5a8eac275ef618bd22c4f045f2fc1dfcfb3a3c489edfeb7c68d +size 169167 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57b969565b532fd76844a564ded8975a4fea7207 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04a23e2099c8dd33490328c805eaa8ec2a02005f732900609268f36eca4715d +size 90042 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5f4f8e8e82b6eeb81761557a8eef1452a416b2d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9d57c69f1ee59a5770ff0ee40979e91b25aabbe9f1fb8ae3bdfaf169ccbdaf +size 153652 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb0eca6fb28cfca26f79877c2b6f6b40627f6b2d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68022eb1d05246163b387a036125c13f57351c52a9de7e37838cfa3ae377d726 +size 79223 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08bee9b8ce57d2ad0ef760db1baffdd61f728159 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c41b6c2ce28b71f39b24f3543e83138150bed9e5e631e98c40acd72140d5b03f +size 74153 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4eaa859a7286fa03a17ff30bdeff9842ac6d594d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbbc43a6410dfaa44d8388187a05136732e3a2a17f9fd3d6c78ae87a66caef14 +size 82382 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbd7e7141b8092aa3fa8cb9aed985994e9377565 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d16361ca59e2867c6f95750c2210fea16ee4a8135a19866666506969cc4ae4dc +size 89732 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c952451ce27cadd50eaf961b660067a43007bc86 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c78c9c12d5c1eb67266b4ce9ea1815dc019d6a2e2d8d7cf89c70bc73cef98a4c +size 1002350 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70bb47b7e026d07f59a843f552aa11528822fa2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad9e57d1379223e6258a665dc368b18476d14331fbfa3f64aa4cf6920fffef3e +size 132623 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2ebae8cffeebf9720eecfe135cc2ef10cef90cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0e5953cfdbca4e479de64b76cc007d8e19d52eda357a3099eee8fbf9bf47aff +size 182520 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a4cf3db197a893de2b3eccb76a5831d4a089486 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:846f857c781138b9d47f85682d5ce5144486663715c7817aa9dfb908e13fa61f +size 27477 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c580eadb900b8f437ed6297f9abc1c3a606fb32 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:969159819373af2ac743fc1b8c230648e7fc29346396d627615292093638adfb +size 120195 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7cbf1de78a67dcc79124dbcf1dc0bbc6c71d191c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb187cea0153622733536222c3d54b548f7e655d0e1505e2fb489c67d9a40f94 +size 57972 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e32f417f2a9c0ace5d06ca33dbca18ec4613b6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43d2186f3d26c5b6c06188a79239c3fe77b31b0cb83364442cd4a22e402ccb65 +size 25430 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9323f657c63b18b9a129dbae0bf8eb744304c646 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8850817ac3238c57692f12f34b26f048696ae304d3cc990ccda8c055b4e93ebf +size 39353 diff --git a/eval-results/mmlu/0/ckpt_078/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_078/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3a0cdc86db81bed7a5b0f0ad1d2c7a21fa8d6f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec55a06820eb799c46a5e8437c02c3fdce3251932a42b394a0e3635e4c6d1bfb +size 32953 diff --git a/eval-results/mmlu/0/ckpt_078/results.json.tar.gz b/eval-results/mmlu/0/ckpt_078/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c44edeee9921459d54a4f2bc83d9ff20a367ffa4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_078/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe41456cd43e5ef49972cd4621878bd80c65bc8827875f4c62a79a4c6968cf2 +size 7587 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5ffa15a76d739d53566334dbdb30f8fcf39556d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88bcb48275b39b5a1877ad00c81347c13e74e51c8b730d64bf5f0791f272b87a +size 17014 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a2ae45e034066d9348c529a1a7aab81f54a5f6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7dfda6f92833492ff582b24b90d7397ffc95d9ca3cac384b48816d6bd48f49e +size 29768 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03aa274128e6cd88409d50a810118f06fe092bae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78afaaf4bb62e67a9993a8512bd193c51b8605e24ed813e0fb25c92f871f59a8 +size 39772 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e22241c9f99c2f6b905df384ba19b1bacd49fe33 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:219c86ded4a4a36f2c78773268489644fea6ae05fca94bf3b2b0eeaf62e05c45 +size 26641 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a249bcfa2e20b876029f958677cba77887a8fe04 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6632c450d49b5e9c0188967885ad608f6b1195fdbd0356474f2dc9706a6f1302 +size 61086 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f210c51a58815645a4d11e86b6caff77ec2ea11c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0541d50ce3f0808ea2614d2ecb3d4033a13873a6d381936f287af8268b5fb6c2 +size 40304 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6f202c6bee4b475db692db20f81d8b80cb6d993 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e21646fcaf87b92055b1b78c7495d434401c0e23641f96069f340eb5f035fe0 +size 23712 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c83e872ba3d39cc06fccac1fc4534511628a8205 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bdc1f08c4aed7a167ab32b4a54cec11f7316cd0e1427661f3f9a6114b344fe8 +size 31023 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94b600c28dd92940c603da94ab5178e53392badc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b32523acb9985710057cfe31a75ffd48665b5117fee91a8a5facb075c8644445 +size 22923 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8080042d17bac668e3414a030ceaa56de3c02bb0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ef49b2f88a028a73c8707a7fba827d1d95b8e7c871be2bf87b5056aaa210a19 +size 60778 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f510641f2c6922b369712407a94afeb2c7c4cc88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a6f94a779a2632576c3a57bb8a33e59b413c53a6f4dfd9d0f9c793b34d8660a +size 25658 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a492b6c626f55e66595f577592a9a7526f6a0bd9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c895510098e67fa1a553f73f6970d41462e880fe86c7feb9d25c54173315cde0 +size 25724 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cf59c9c1f443ce7b3b8ff0e4a8f169c7fc5edc5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7436a19f59c44f245d2c68f0eafbbca70d2270fadf3a50d0efa4d7f336db3c60 +size 46416 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74549b197b91279b21c850fa7942d2b3d8e578e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f580ed59393ecf6c7b16c13d0b377d0769bf555be9eedb24380765a5a6109966 +size 31415 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c551c144925aedc29a2806d5b1ac9d4f1eb0595c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fbbadb1294c51ec279c17aa3747179e5016a34534a8be460d901fbf1cf8f3de +size 28718 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1db2310e7fe273e98162a6e841fef9dc458b58c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b41b52ff877c0387d072bdf3d3c69e3420a06fb4fa1b165620b1b5a47ad22115 +size 74291 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f65daddb81a5bfd47ab4f19b79533584274ea53f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89552f8d008273077d16a313a69828d797a5869757422255329d7915fd76b24d +size 30036 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eae645f310bbd381b0940f609db69954c6caf7fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4f080fcdbf4d89f657c62b71a59f2285bfb25a84c0a9a832eecfc2c6f4b998 +size 19067 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed838647547b5cb5f8eaaf9f38efee46a66d050c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce0edc3abc1f336b9d6603b48f58da8617012557af652d57266c2e18747abea9 +size 87789 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..770525285c91ec73700d212bec25a24b28390d3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80899628bb66958916dae5c7f7e974537b02d5d94e4e03ecd6be407981c2a7df +size 49807 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..555e1fbf8da9d224a08e8f10a158000255abcfdb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c20e3e7f961c92a04dcc8b9a49c5330d43c0d6afa886af911ae060288b8ee4e8 +size 31524 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..224bb4a8feaa7847e0aec3808e36965ce64c14d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5909996b15957bf122f9d1ae9eadfa590631dd1e604104bee23bdd51d89d1ca +size 144964 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b24aa9dc01de5f9ebd6463f6beafcfff19f2741 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:641abb602737ed76262ee08f0d4eeb09b0699b84716e4b086742de17d3370b09 +size 44425 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0bfa9fd81674576d625ffaa7e8d1a5453dd0bd80 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f9c95387183c61952db5c48436e670eb411aad0f689a8e81974bc041a720f0 +size 54330 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16b8b0d4c147232ddfc2864c9b26d66450e39691 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ea37564a180b56d8258072a9db55e101b959e8db5803dadf87520d687f1a187 +size 92481 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc83edb10e6c03a903cb8048b13118fe01f407aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f121ef2fe7a40b2ce25cf8045ad9342420a389e1be8246123b836877cb4f5c +size 56932 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7136c58f3876120044e5b148bb4ebf82def5367 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f61d02a5e4aa07366d196b7fb65fe9f00e4616130bcb0236761e21106dcf32fc +size 58313 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55bfb7cd20712bfb12a676467a9e5799073513fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64919c270d453a2bfbcc30edd6db1a148f40e13d08bd3b1f0315fdda8ff78ae +size 42784 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5881c7bc5d878180b805a9eddbb9845bf2103c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c3e9b12cf320bc5ee51c082be9889ea3e266cc12850a584d790475bc5cdba7 +size 143585 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8be1457ad054c5e1d4046ac706e4a9d31ad0773b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5ee62a5c16bb601016f82b1d88747aa3643c40f6bf3b93f9a151b6782882ca +size 71039 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2603104d5ad796cd87897ff199e6b45ad7ff73af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63fd8bb61bb423694827dba612e2f883b0ef21826420144f418e20e34cc7ef39 +size 161630 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f19941401ef6e30294ee2d5612480f50d08c2f2f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6522170d2b6af1c239840d5e583ff2a7820054e1a3215830b5bd4ea7f5c65e46 +size 208995 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a556bb55b1811e94fee87e3e211dd26d5669e20e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b523d99320659c200e8d2c829b9b5bf85426788774b4ace24b58050a67df41c +size 49736 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c362a13ee69cfd3c5d975a5c3b70c237e23bd8ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c7aa73321e6850e0383df431ace91ef3baeb50cdacb85055cc3f411bed001c +size 31435 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4508cdf2532d496629256b7ee1d5734830c88251 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9af1b8455743ea552275612a310846325015405da2465741e90205a77e27581 +size 35927 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9bab57c2bc576efba8f3d0c6b08ef7ea2a5608e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09c9b9dea369d4e20584459944fd29c9331978b5c8546df841145e2230efb7e +size 29406 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23d4d75ed80f3f028ebce24358e03346eb82277e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc99d16a4ff693d571a8d519c762f22f20ad994bbafc2585d5022e914438dc2a +size 40252 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a2fcfc33143333fdde297f3a5d87f6af53107a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c1f1cb8f16d8ab4198abfdbd71630d5ec4c731b02e25314f1cb2ea718c8f75b +size 26949 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2c3b2b681b43f3b9daddb3a0c8cef57e2cdc49d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad6bcf6298972febeaebf6c239d7273fa9a28cd8c926ca6ca377fc2d1d11889 +size 21637 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7a5be848c68ec48d38a062ca85b46915a4d4340 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b0e881304449a9423a87484a7ff846c47c0c93af49127df8a3d80ae459bc71d +size 57344 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d02aa64a6748fd810224c05154e8da778c766319 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdf133876a8e1c2066799b3322d7a8c49bb9ae2c427c9e4eaabca0257dde0a92 +size 22241 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..938d9e633a6e0927d6e6066a170633c6cbfcb676 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a4b7730f4cfafadf1a0b11babe52d9348a3ebca50839b84f639f91ff04e861 +size 168991 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af4e79a8228efd097377bf852225b0600e94d93f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac083e701ad33b159ceac4bb1f7fdaef2ccc90c23fcc2b9ca3290993e9d14f7 +size 89851 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23583f235543b3e9153f7d6715ad1e44ff0c38d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:533016c6d5b1c6bd2efaa7dda1b67012a89c146bfa4f3d6cb435834e51d9efda +size 154240 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c326445f04f10ea3c915888cd68e27249054dab3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd4bdc0ab19d493db6ff0a82db8765a9bcc6008cdc0313a5fed2e7c5c2724ccf +size 79260 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a30cbd694b09c4532865ac29da2458a34405cd86 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e9386b11dc44f179243f18807dda214578830468f74a4d4d516543d2a326c65 +size 74193 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c16a5916a0c4dfe588f03bd4d0940253509297f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:184f97e2f09692e409e69f1d4da839ab9a462aa535c0170ca67a83a7f43844f4 +size 82379 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3660cae013e93f3a2ce4db631fd6d64c8a10966e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdfa033327426172fbbf12fb2ffe02952468d7be5d34c4f7a03921835885b61f +size 89749 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fa95aed2bf0a7b7697df8ccf87e3ff47165b8ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d07f0cc0f65f89e2855b851d5b70e6dcbfcb2f032d15957e6bfadc907eabc856 +size 1002634 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0faa149af18839ebcd255486a904ea73c516568 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e334db7f71fc485e0160c3ca3e96334d9f0f4996ce1c0ec67802c5eba7bf9f +size 132776 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b845a01f82b4fb2a388c3ea2dcf3b8e1e32df1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23384bf2187c8f183a3c242d952d20a250e56c6250b24d6b618c57e2dd527342 +size 182525 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9063fc2ff00fac255d7c5f358806a2f356ee761 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da265822ee5d4e568d67f2eac629ed5140e987c04635ca5bc4ad6dd76786923 +size 27459 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf597bb9389f3a6bccec2c704dc42305fc9c060f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63340e7650129aee55fdaa3f723d31bdb1d0b753f87fed2c920f15f033d31200 +size 120137 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64c5c1fdbf8225269501010fe81df33cf543bced --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b32d38c403118fd12b3319a30d8ad70e928bc78842f56784e6a7eaaad112e9c +size 57884 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f10b58158c18c60ecfc2130a7250b90319165f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b775a027a269f3cad35a8694f3d4080f0180b7115781a55311c2556647da4270 +size 25402 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b49542695d2355babbf5f37be7caedf977cf6cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0eb511b3bacfcb25cea30169124f96e812a7f1878805896b4c5d1708bb967a7 +size 39413 diff --git a/eval-results/mmlu/0/ckpt_081/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_081/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfbcb700ddc2ccd7a61490e8c9ce66b8ae701fa0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e424ee59dca83c7bd69e42231462d85063b2a582ca0b25c0b913d3973c3cbc8 +size 32917 diff --git a/eval-results/mmlu/0/ckpt_081/results.json.tar.gz b/eval-results/mmlu/0/ckpt_081/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79cc9f4f5d0079ec4070688556c679bc651f6e54 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_081/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d105c6e062e9d9fb778b09b6847de84c2bfefffde7952679d61b2f5b6956eb1 +size 7616 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5f8ab45063154fca0732fee59219e1f740c081f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c068d87a5dbfda337256f722e9c8e44094d0f7b3a5a74b93606c124415058407 +size 17007 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7e72889e81d39c6030197069dbef7f998173bf3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd8e978bfd723bab403ec8667821b3e76e058af3c0ae0795a4983518500624b9 +size 29763 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aebe108ebc380f7ba0f2633f5398fc430d1de52b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68ddbb48fa9848187b3074aeff19761ac8a32950f03ef4a3aa7eaa1c1d866841 +size 39785 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..111f8a29d2d922deaefe0874357082fb2a4c2f2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54dde70c6e3c848973315b44cd206948de1dc9ddee43e99b0827c6bea0d5aa5 +size 26642 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97180a131428bdcbf628dd039e67b07c0f5afa19 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f55a9c6f940cde5329686842918b53c08953117f84ae4c61fbf52f61039de32 +size 61065 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a88b022559038780f37d056de533dbf7f721c25 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f582df5df612e991c6b95c9c4ecb90372a104b73691ff9dda528fac8f82ff236 +size 40301 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43405af5193faa08744525c15dd2b9dcb20e6ad1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e7f4cd14c9fad94f8271fe2f88d6adc1b544efbd8a7735aa62f236feeb9473d +size 23711 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c02d8106cff731b05d2093988660aa82f8e1f79 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:568cb0770a61bccc739b9f303c8e6807c66c4c4238ab4d5a0a0c56637838e421 +size 31036 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e8e315d329f8703ea2011aabda636f90dab741c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b13cbc3e75af521b0a04b7f0e6d14f6d11f4099d03e5dccbd6505206711e075 +size 22895 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2042e546222ac082c3b0c580ca339a796c031bef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc12eefd23f1e0884b757468ad9ad901c0fbc3e2e6350532717cd75e67a59c7 +size 60795 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52c75645c73f355c0519da42ca92b8ed0fb19dcc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1442ca146627fc3d8b238aad616daf78c0bc5b1843ed88febc33c22c32d7a86 +size 25606 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0243f7932e597d0db2cd237d9455b1fd9bc2717 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8aef3e674048766265e8dc9a017620a5c7ac9e30f87fb2c0cf3e311bb939293 +size 25698 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..982f278c34e4424b5d52c9b801a8f487423e2c50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ad6f90ba7e9424a0b049ef8b0d00191cbe529b5a15aba142f80150a147cbd5 +size 46347 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a88b502c5ba12aa583fb0fae0b75d3b8457602e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e033e4945025769827e90436bef48fe1fadf2328dc38768e157d10f2ae0bfb47 +size 31411 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c0ccb1d161ad7bd371f4b876149883b3bc46657 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7756e007b4de3d07b0619d978c818bf9ed05a9214b3fc1e665b4c1556fdec7f +size 28700 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c12fc228ec5ff6fc00e93fa1a7cc026c2b426d16 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48537e2fdd34f941ced50ea4d7f86c3421d829411a9487d58884857b0173c795 +size 74231 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c40284b94beb08c3bb86b4f8d2e4091a19bc7dbf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a19e8bd9f4972e1f00bf70ae0301f90124b3401146c6a37309c3a82b6ba0446 +size 29991 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d56b5290d5b79c5aec1c175a7b928debbd316fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebd07b1b2c03599b3d5422bb80582b05a8a744713886b1e552eb65911e95cd38 +size 19093 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78212909cc195ac57784d868c597a218fe8a7e63 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32418e9adac67abf1a5b0482a50da26ad6a37e0ae0f2d086b1e6664a3171504a +size 87782 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d7fe160f6e926b4c3625daf6accd78ec2f71d5c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c361623490c4fd8450379d019e8a890446f6d2441aaa4eb9403908787880de +size 49844 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91b8718a6377d6828ac6e5b630ce0815887a99a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac2433af2238bac9e6690fc9b98066542d596804f22dff30c53cd4276470c0a +size 31483 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..357df9fa09a800e41b28cd6a4451f0ae4b75dfa3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467323bb8dba8d6de0898c66e11ecc0435db215d8d4aa903b22cb962ded6e11e +size 145028 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9718a12d6e59a1915cbbd7a5ea87db4ba171a818 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ba8085019a7ea1ac434decaa2161b06df63e1eb32a7cadbce7d18f75c12eb9b +size 44437 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..611f58001e44071623dc51755dd36e67a30a492c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5179cbffb986c6e24416619149bceb5851b35cb9f83d89b4f07d47da2beee0ac +size 54334 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e31b40154385fb148fad4f9f75063f1165666657 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b51e5b8c7a2d6af95d3d6c741c232e30c136579d90d055826705a48a89651af +size 92440 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69673f230cde124c9ac507586b0b4693c034d1db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c766af4d06a4c0922ad0b244af49b96ebe4a83d32c5cd5c6d7dc193afa83688d +size 56920 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d792b62e477c803fd1d3d1304ce95f4b946b748f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c252807f544d2d932e2f35955441f9d9b6135809d8a20b8d4dcdaf25e59fb55 +size 58218 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f238f1ddf640bb2fdf2dcb668574bb2c9513f3e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca6092f7e75cafc2a2142e4605687ea5078d6a2b3c8105d28ff0e021a1224b79 +size 42768 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5c7330c05c3bc55900573ac8b20ae3ab11e246b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1542ae544925d5ed703cafee8bceca399cd434edc75876902bf251da952b00ec +size 143585 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..def9eab5972cb00274e429a7b6a259a257620f6c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5a5e791f64700221e412ec585ec07d91142a6f18448827013a807f8dee046f3 +size 70926 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa617293b9616b3f8e726666ed212b8d23bbd21e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad4247762dae6bf76e2efbe023c53efe03532bcd72b302a91b866e4ad134656 +size 161599 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28ea1d89602260e4153cf1bd14ac375bf6141ca3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f1dcaf161bf236f47d4de19e25f490460e049e0fea95ff25de5450a5f1a0399 +size 209016 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e1e6b09978efc71434e95fd9551240d61304f08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a936f0fcc8c6f853a05531e98d66fe99397c2e65cdfceed694b5c9dc6d574a +size 49710 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a75ad9e9905c8523af83595a8e81400bafe9339a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:142a88c0d6c90b2785a72c274485bc3fa0f8c1411f6996c76fe082e0b5eb13d9 +size 31440 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46688ed5fc0cab946e0d051f6daf670f48e870b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17bacd77f3b72ff0263ad76cfb1c237a2844eefd6ceafb3fe026d6caec7ccf59 +size 35923 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10c7d765d326c8572afc5a58316505f549c50671 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:143ab04166ed87d9fd0f0c95296ae11ffa907f6c5542b085813524a1c9ce664e +size 29392 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a77f4a72cef1631d1353f776f6a43a4c0907e11 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffad371ee73f713d7540486e617a108841fc4b0dde53a1924e8d51594a75d83f +size 40201 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..492dce4dc386919e0bdb03e9855e992f401a886d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e280bd4800ff5e0ff9e479e274f07c03996bea77cfcf23b392ec9028e8536346 +size 26940 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54bd357cd0ae2b84d011ecd57150ae9dfcacdea0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5029826a223cf9ce54d32ef5ffc4e19ecc92c4d1d39bcf7415088a2bca307dfe +size 21658 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd73106507959ef230208a6d0be8ff44d3eb80d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04f1e108bd51a92e13d1970d86bc9ba6a4bb03a1bc47f2a319b9d735c2b61511 +size 57282 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d7ce65425a8118fe97030470b48f82a8674dd4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b72fee0e72df86caf26fe5b29f8a1e51599364f95294f20f58e60a0587072db +size 22210 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9d1307f9f2c21546e4265a5c95fb388014387e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:213251eb47c5217ca4f162890ac1ee1789eb35194e3e8a0df3ee8988eadeb7b7 +size 169088 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eaf0c50f4b5f03ab3663c2b77b49f1af54db89c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a936c31442bb12d2be55e5653209f5840f2d6cbc3088bfe4be677b13df7f7c1 +size 89956 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fac25cfa88d921bbf04b487afc4eba68d89438f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f7b05e712591193a1803710c183ac24e0433bdb01ab8b5fcc121979eacb030 +size 153492 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f172c9e342ac52c38d38fb2a64ce0aec34b8bf14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad7cc49a3888c97dfabed1e172ab7db1fedd70d286e1754f62f77f68695f528 +size 79192 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4bae69495565674e248dc4c71806d09b985295ec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:734d00fd015c3c6d9f3772de4581fd24af9151f47c99e165059d8ec1dbbe4304 +size 74120 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f8651d4134a62d525373e33fcb3d0777703cc7b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a0834e2188f4afd33c477953a8db75223bde9922ce854675de57ea6cd4693b +size 82365 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ea3ebcbdd6f6084974cc3494392b45774a436db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c909d4ecee407a32010f3040bc3fb8cf71ba05fd747e3c109ef73429ee8e9d +size 89666 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4953977642a3693310ac750bbb315786f9c4ac1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ab9516b25249e73b5aee53b64c2df75e1d04ed1a444cfae18eb774b02fb78cb +size 1002068 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0893142685527edd41bc30254117c9704f172bde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b11675cfe385161351b58bb03cb8622132b87101fc5b8dd578ea65d9a8b6eb07 +size 132655 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3988975bb031c4509b29e566ae3c0703b3a85e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f78f89e76e72050059deb5043fbe569189a91c55da0ca25a9b32e487b68c4e8 +size 182463 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c02e5f67ee135fe9e8875e2fbe381b522205a34 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8459f0723742cb385367af2adf651af777031cb80f4561ca6635c40b066046e +size 27449 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e21498899a96ee6f25770d38f8f2da2547aecda2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93ee2b2cf032ab028adc95b68f7da8ab32886b87c8a19ed73995ba4e040a5b08 +size 120018 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10f9f1690e2d481f0e258b2776dd137aafbf27ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faa0980fbadccaa2ed41224dfc6e47d1c52ed2c8683c643880a2d69f547520e4 +size 57900 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc43e13f1721ed0e3321eeec6f48c95ecd4a65f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aab54eb891dc3232c752fd0fad3f400f2a9d71d38171d47b40d1d7d1111d0e3b +size 25358 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a319002d1499c0a4bbf69ab4c2a041eb54e06285 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:006b1b301e3453a737a526bc7bddc1f341809ee8ef1017987c31b7beb3089e59 +size 39365 diff --git a/eval-results/mmlu/0/ckpt_084/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_084/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22629e0da6b86f4cafbd5e5a2852b38fb0cdd87c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9804c93531977883ee14ec16d7b2885bc982a5283085480bbbd00de0202c0fa7 +size 32944 diff --git a/eval-results/mmlu/0/ckpt_084/results.json.tar.gz b/eval-results/mmlu/0/ckpt_084/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b9b1f325db35f3234b7628b1d470384c96f90a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_084/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ed2eaeff99435969b397153f3ee573719a5e0b0990c5337b8a5b718cb41c4a +size 7632 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a25caae88c7d6ed564f0816e980a7c16e3c44bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27b36cc0c37b8a1682d19dca34cb58be071835c2d95c60bea86652e17b4f8e8a +size 17011 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1676706e9e8265112d447c6c64874c7bbc28fe2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f350e404f253af2db25100089afdd1e8eef17cc37ec3339c76d267c7f2bb355e +size 29747 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfdd5b2d839a2fa51bcf52280d0578115fc3dcec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9ed81f9ed77d217c38f12459c3435cbba2e83475a8ce1f1c731c2bc4b7de506 +size 39745 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77b259914e288539b7005a4689a89b767d5d37d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:299c9ef75889d73119fd456cfdc713678f5a9bebeba642f07a6368ca30accf17 +size 26695 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8d4274f4a025e67a882d8694f307b8e67a0c6d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05012125dfcb395f26c54d3c3baaf73230e181004978a02d8eec4f604a6f0d7b +size 61076 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fa012273704c6138de044dab027b788d02de354 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6efdd7d6a2f9d2245897f7df6506e10d70d0c9534f3dc06558f67699d7bb892 +size 40299 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d084eba3221c9aa2f79e40b613ff1ff4cf577c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a1f4ffe2165e5f4426bfd87bc9e6d756e26bae99cd119ba81a2ac16a1087ca9 +size 23728 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af83f653d0b777fbe81b68b65d968ade5e6f9b00 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fef1a3261f0a4fad46274a45f7085a7040e8ac6f4dbe2a9a113aeb2252951da +size 31060 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e36f5f7660ee6a3874e0fb9d1f7c463e5d893287 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f5b48cc71e98d2dd5799427315d763b2935081d5627ed068c5d2f5dc92863b9 +size 22913 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e3f0f5a1c3a01b3ae607676fd1f1079a0b3aeaf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8299c7d4447ac848c59b1b7e644207eadffc909bee571b047b550b11cb5a123d +size 60781 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78eec94ed38bb050c72d64f5378b26704c03ba1e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03907c8f716feb2278674e72390e5d1295d81a90a003d411509048dd96d4fb10 +size 25610 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80c80e232abd787b999f3b44741f9cd18d1a38f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3dd9429498f1011ba6b9105f916e7daedf59c1d0d2379583fad7a52b2298419 +size 25740 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ade07e7e9b5bc4bb8b7faad8311e97992d5b2a20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85cf7b6b58359600108ea5c60da172fbd3202434f94ac425d86a3b7173b08853 +size 46360 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c4a07647388f156a6765d4596d6f09181a34033 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4de191730e9714f48755631e546f9f4e50bc991ee53b8f8f9014a749b08e66d +size 31419 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02bc388c53e995a99256b94e26fe0a999af355e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7fc02dff274bae81a256216779c3c2e05410a0f0c0fa88b78a1efc6d2a023c9 +size 28758 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9410315ecc3ee82a921094ab2b67defaafefd1ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d64175483cb9f2f34d279afef11f951db06392935d68c3834a31bbcb4a6f143 +size 74288 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d39ac22ee16f55a2df9fd779d0d1267bb464ddaf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c8342fbb1427aba2a8f8c1b5d2877a32423a02f8760c2f623a0235fe9137af8 +size 29988 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4332e96ae61d48a874e712641ab57857e0a8b42a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b745a13ce26007b0e355f5c2c72dd233d47cc7c52c159f7b6e929202fe330feb +size 19060 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c0926113ccb25eae4d6a76f1b546f6c2562207e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03802514753328b3e40330734f24964f440f852542eac54a35b20bfcbc12b821 +size 87794 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfade6989909b35b2bcc36c59805d819d9dd3fe7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33f2959473946a444fcd80a6ba3d21b1428ab541e86e87f31ae278f6fe8cd9d5 +size 49806 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b10e7703dd27804726a52bd12d3e8bdae726d4ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82fc0c88a5fdeaf2e189ece8efc02ae321628899ddce159fe27ac361e822790 +size 31540 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bcb31b8f294f7c328c77c612466e44c580076ef2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ec1bcb5d5594d25f08fd7146c72d6e33e425e62c4ae3e01b8a5ea5c1caee2c +size 145071 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..447691877fc710a28573ad84989b3cc9b5bcd444 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36d27b7c7291a74272bc8462f5a0b59496c1b11bb93dfc5e931bbf91e4386d0a +size 44439 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ed61e68ca9dac2edc73e0250485176a27929996 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:332eb80a7871a036ecbde7198ec17b5efa519bda7c841d4853875a28cc1b32c8 +size 54337 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a577796de197f088661fb91913a1bcc4641aea0f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7374254434479f3d8b22abcc621c2a46091381e624c836f884c119a86fc1f3c0 +size 92444 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b35916a05d7065d959ef4d8e1e3b6e677e5e1bed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade3bd86df79168eebee406fc0261be4f2fa9c5b0c3f5e11e90eff9e8d765896 +size 56893 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b575a967a70f5c1d5d182857bcf67bd60f87a71 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:178cec81fcfba119782c22eb560809b804cc76c2eb1e6930853867d9e1535705 +size 58266 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48feff1e30576e5899e13c4fdd6395018df43227 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f6b40464af925b2fd7389c9da0b952a43d30f2d74fadb50959ea94001e5941 +size 42778 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7fdae12e27771665e0c5fa364c1192f179758bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20df6c242d476353e289c21b8e5678d40926b22b1fea1bfb6174fe52ca28987d +size 143662 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cbbf9abc51db962d36d1696d697b41bf04bbdfd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d054f2b44726a92d51ff704ae37330e92905940a8f5063e3897efece75594d95 +size 70966 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e3a4d5f29852fb81f31b39ecf6243312d665273 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2eb45a33d3973eaa9af406ff173bf31ca293f18b836e76129a3ba2b1e6071e4 +size 161951 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..806139e3216044b0bbfb58412727937662f7f96f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcf71d88c92941819f6fe65e2002fc6042ce7d28dd17e646ab3ae093e05671f +size 209192 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57ac16036bcccc14b43b02f88921660f759253a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:375058a0c3482e95cf01b8ceed75fdfad39e6c8d00cc43a2901e2800fb5f8bde +size 49779 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a8a55e72344b5776a4022c5675fa0e01d482973 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591e865a951bca89a8f38a677cb6d4c0a0c8de5cf1e765bc1215c5b1a92b57ea +size 31453 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0579b297a866e86ab46c58f34fab2d7251dab1c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83374505c2af1a9a3877ca685c0e196298861fbae703015e0ad453ff7d957fb3 +size 35966 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d7aeb77f69a30b43619f6f495405d1b10e1100d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae108e89533109ebae0425f2e4fae1a223ce096c41c24c1bad7efadff936153e +size 29407 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53eec5383441e200ef161cb32eacd1b999598563 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9492e5797e572c0e5e8e585aaefd09ad9c0e280cfc559b3cd5dcb9c952311f52 +size 40232 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80622d59d3724f8aa6bf19b1f9aab0d90fb01c13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97afa252935d8765f51407f896945264da424131b8b93cffcdcc11cb1581f77a +size 26929 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ca504a42a2f55f42fc3099b5565413ebede1c41 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0983fc267e2bc9dba597506692b1d6053ecfb159139a526c92dd7a350eb936c +size 21671 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eca457162c4c29c16172fb5810715d6aec35c780 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c30e92cf9804eb944fb9e8d6f16edf56a2a2bd4a62322ba45ef3f0c413e1a4 +size 57314 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26f53c1f337d3e1401f3dd5231c29176b18bae18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63745682fd46ff19ab0ae2d4b7ba4532c18fe3d5310d76c87730aeef6125162b +size 22227 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e99920144decb0ad3a97cdbf23da28fa524bd34 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93a7088e2740e4fe82a30cdaaa729681d285d88ee360830d6f69e0a8948c147 +size 168958 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df007f1d7306b81d4c5acd5cb4cbbc4006a66733 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a27fb180d0943463397a7f3cfca3ea83089a7711a97e9b0ee540dade9fb7aacf +size 89991 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b51fe3bbe4d26a45e278dd9d415c4451c15b16eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:887e24dde4006dc447f0129a136d6076363f3d80c1916d31aa485b0428ff662a +size 154069 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fc930b85751f0657bd25966fdad68e7d8739352 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f41e2a91e2435ac2390d1ae443b5881464fe75dbe30ec52c5070f8fdd50d031c +size 79271 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4009ac13adaa53e7a3ec94d5f0903b761a20e344 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5e18ba71806921848fe5a6106a578f7020ffc6fd51d205824fb9a92d9a0fe1 +size 74162 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a79d83c5aa1396126f6defd330f969b64c292d35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2ecbbb479e28fe46a000f4b39fe876c153de1cadf91d653a729795d78c96313 +size 82433 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1093a5a7374f4f30d5a0da5def0f44e2deeb98fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e04923a46b5dd42333ab9441b0f2160fd0bbc8f4eaca25abd698727c8b99a9e +size 89816 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc99ab53fb925d8dae178d813ec6b0a9dddb855b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba2f929504e2e117f908f36c6f1ac7f21d20c12ad2c6951385ba01541005acd +size 1002900 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3967c0a0e2c1afec64f06595cfd1ac43d33676a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c040bfe9cf8ce430298cc7ffd5293aa4c8219fda1a247c66e6a6569d2193963 +size 132778 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23dbeb03566566fbb75cfd98a3e6d2c220f50c98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa5c9d52d2f2843e6959161d6d665cadd702661697a2cde5c6b9f594372da24 +size 182586 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..458d7bc9e6a919c488e2bc372793783dba5c0410 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd2e4f9c3b7032ecdf960e73752866f7b49806f327a51bca722f816515e2d12 +size 27484 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7261844180edbd0a5c4f47a75a09dbb1e1b41f4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d461ed21a0a452e579f2aaca667f7bc9f3a1f28b10f10b90f8cde983bea7ea8 +size 120198 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a32e6d11f30f57aa0fe91953d19c5f1e96aa344a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd1a58ce39d357eac18b49fc6d405f484742c5abc1ca210f27c3f02138d7dce +size 57869 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5172f8f7169e474765aa0afdeef3090d97b079aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e7bc077d86bf91c033c2e947341b354180b0bb8fa48fea233c3e73bd9203705 +size 25344 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f574847be34d2dd1db3623612860c43fe7527530 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f7f9c12ec2e14ebe19c69ac71178c53bc708c98fc3c2c34705eb17cbe942d4d +size 39366 diff --git a/eval-results/mmlu/0/ckpt_087/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_087/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9bf4edb9b4ad975eddaf6030e9bdb06d2b486a4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:779135b777c1712217063958e16a11e545295c08e54be1e34c04a8ac185c0a7e +size 32957 diff --git a/eval-results/mmlu/0/ckpt_087/results.json.tar.gz b/eval-results/mmlu/0/ckpt_087/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18f9d8dc46cd663eb91228135b5609d53753da82 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_087/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dddc1bb5e7768459313548743b9a1c03355257cdfa1763173fcec3e69197f541 +size 7592 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cac00cd9d18f971a57b3f88a5f7a3e111d654f9e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d46f4ed7dba1d1d3abe1b9c3d5f446a7443efaf308525a588e49a58ca3d94b9 +size 17017 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60b3b89df779d7e62c5df8432271775841434462 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d3e54730d609231e0d38a45f1bdebb33a277cfc4192c2f2879a08acd9aebda3 +size 29747 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a095bdaa7a0f0f123aadf82ee9d573b158572925 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb308a09c4cc9e9dd9ea725753ea024de00eafba01aed06abe81ebb12464a424 +size 39758 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a2aa90aa49fa2957cc2b70eab24ee9beb269649 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d002567a814484ca4395ca0fb1fffc4b72afc4701a29b979d889fb6b843d3d28 +size 26653 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72b10ed482660de43a74e222670078563133a596 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:542b393fc79d20be7ef8feeae38d006f648a6c89769d47739de257661f4bb968 +size 61041 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97bbc7d27bccd355a275f5cbad44de4890cb8e03 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:363e3478908a079d5d2d535b9431cf9ba77f574d7d64e629eb96fe721369cf33 +size 40287 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0065e1f4592076073cd978ad6132eed13099517e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293ecbd02b03f86f564ec7cb2882776d29c58bf8a2039058aa8e1b40a78bf918 +size 23708 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80c1ec3c534b491b6a9271e002711a693fe1bec6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8073847ba55c9fe5cd05b12c42937b3fc0eaba378fd0a35392cf895448bb3af +size 31024 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5201cbdec4171c1720ed55c7e0dc81b58ddd2858 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e37850ef7b54fff02eec3ff82c6ba5a654a57c4d299f114c9a95cc81c6a4d723 +size 22876 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38e0482f31c7d4230d9cf1612bd73549ae4110bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48bcc932ffabed4f114d84fc56466f965ede0d70141b33d66ed04d89688dc324 +size 60808 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..309687cfe9df283c00cd415a50cd72bd3ff895e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29397202294182968bc5bf1ada6c4284157620a29f070da312752bbb28db0d54 +size 25612 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b9a58f5a59e4c1e226ef03b4289132abe611165 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f8a174fb171db84d00574b9619d28f961bcbbae581f7cad86d7a6d48a129ad6 +size 25741 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a29e7748914effd1bb123c906a463ab7a2d702cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:654ca38dbcf64f652032f48eebc814124992420fa8b4d72a677f50d9c59e7ff7 +size 46284 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc652f8bc3fda51c8cc69acc62ea5d327ece9a6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42eea1eb5280b69ae9ede52e0ebe0c26a3b569ac85490382cb3721d6a6c11418 +size 31353 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d061618ee577029b1c5d9b59b81012008b4cd71 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca125a0a8c8a1a6bcb143197e0090231de0460cf2782df45fd4ec34f1edceefa +size 28693 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25aba33fdddee9d1b32ce3a658ec9bce206b74c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e5d559bc12e66f24ee7ea291063e3e486471c8b8d14a99c18734e0ff75a63b +size 74213 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7905d429567bbddd3982121479ac7e64e0038feb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad11a8fe50ff81095a62e11f2a783ae6a335f248f8ff66d08e4f3a7e5c46f908 +size 29999 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9ae2f325c82e69fd0617897a1ae5dabc7581c15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:449be1c11c9e2617fd8d40500404c34563fa983c26f4c5732c160119d26af357 +size 19058 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afd6696fa8f045c0a3993f0173c75fe19a672bbe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4382f47ccbb8c07ca171358ec8e9cbe944864d1f8dd8467e3fb909235c9991d8 +size 87739 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60590398577ece5c6834084e6012d23aedb77dc9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a3e4cc9ba2caa79b6d4929d0be35e5908db2e23a8e63e244d333ef91572cd5e +size 49781 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f821981903294c4286bc9c28e5d2046a6e75e0a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95fd5a7f81421eb4cf804b156df2696dd7720c8c8aac92402eaae245c77a2e6f +size 31483 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e67e827e6f2ce57311c72c7dcafd6716192438c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41361baf5a75fd0644e9b0795c7263e34cdc68af310722c593b8044b4483cb2b +size 145196 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..883c301966cec108b539bfc14bae98b1f7a8f435 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c9a971b2b3888e464f3fed3412b4c755ec2c043d6e31b4674e79d34c9dd898c +size 44432 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..079e9d80aa7d218fcb35737ae9a0390e95932615 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9566aea46a4a3c3f038057edd7636ff2e42da1c3a5a2941a07ed9d0d1d4890c +size 54332 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c8d03a29a163b92c86ba2724dd994c815721d78 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a015aab22a47568df7a8979fd85c4d52e683203cabe81202ff2347a2953e58a +size 92349 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dea9b47ded660922e65d22142eb971d234ce6829 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44edb295d1f5e2cf0859102ae0525d96136724b37207f899da78e136481568cf +size 56925 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e3f1eeefdc8b4456d16b9c2a0cac4770ed0291a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959dbc9b26f93d953146d4ecefcb26d3a2457388151068bf470046023dbbb1e0 +size 58254 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27b001e76968969487c1c0866736ef9ab9a2f5fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b59e73297b327f8dbe5be5e9d977d0b6b4f7ba0d47e266721be65c096b752224 +size 42738 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea98c26a796d590287e8fdc9e616f92aa7abecbc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4b5c6f6b616055b3d9c871999542f6506f0662c5ecfa194338d00dd43674375 +size 143525 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81ced49160def668dbcc344210ea84e1479112e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:321ea8687b0df836b119da7da910b09c73ac0a0fe56d944e31b29e0207eb8470 +size 70945 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50bd245b1cf087d2816bce55193138c28e96b265 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7ba19bdcd3ace39ac9195b3b5c23f90c0486f01e2985d5ca841867038d41f9 +size 161794 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ab22758b6694cfef6c8417d31e96ed38de50721 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:012d4713e5048fcc226901b0ffff5a559b1f760c2064b77e27f4ef47d79195d2 +size 209150 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10daa33b85cad6a9d925863d8a19011a635ae7fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5d92911104bb2e355785eb5b11bc53f4af3e34027b3edd260758f70d8b0ebdc +size 49734 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0fb639c8163482073c4590a7e0f056389d2001d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138d8e3f62a74efd1f364cbbe0cea29665f52aa34108a6ee91434de20e89151e +size 31401 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6429c4eabd86d21d3a40a0bdcbe9edd3f291400a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26cfb399ca14bc483d76af281474ff02d54052171b812c9226da0abaa6b79aa6 +size 35925 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8fb63302724fa5f42db0f17962552dc6fd38ba1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8db916e492df26e497b28d65fde84fe82c5476e13f6a9489d6cc65310852d63 +size 29388 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba4a6aab09e5eee649db08078281ad172027e4a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204f8f42bab18930347a271988682b9876f5f600e403aabc459667dcc6ec357d +size 40206 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80ea8dc11b83b14e3ae2f50f273baabd1f26604e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdb1f9e1d9a6e3fcc9dc0bc4c90b22527e7481c4b2e6d80106f78abffd8fe04 +size 26921 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0601cc38dc92e5d4e192798cfcb87578a6c9406 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e2aed9b275c3ea6fb2f91f998a8f634f3b9f5a71784037bf1990e8828117a21 +size 21634 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53de14f7fe227c61b515e1d9c1b7505a3035fd67 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b993e64f43bc18f529e2140ccd9497f483332dffbf02e0bf2baaa2603f60ae1d +size 57316 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8900465d3921e315bfe6d6acf19a149b356bee50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede9b620c5d2284b3eee89fb0233f635a5a72292849c780df5a0151c454dd803 +size 22205 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..501064d88ddb648c93bf5142b3544db18b906379 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5422818af15987de8c4b04769b998a085aeee25f3701cd40555e8db00e4faef +size 168897 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a69db9ce7f4991a9fe0a661d3c86876b593dcac3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37fa453723976c9228b231b48a85cde03558c9f79d29257c41ee84e0bdd8d67b +size 89876 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..129dc46c8103f66be3b69c22567c1a1e6432254d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1d5c57043b673651d5345c2fe90cdde20ab55d29aceaee22bfa974ff0b90704 +size 153457 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf99f254a4918c6b8e6b12add917fa6e0f7d9b1d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a87f5a0f62271bada614bc479cdec41225fbee8ce3e6452303e97e7f7a154822 +size 79187 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bc8e89e75c624cc500f6c0c08e1ec0d63dc8400 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92183a2f640e7cb08708a950bc8f4aabdd7d94730b446ac16c531255ea817940 +size 74081 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8dd33c2451a05735d3286365ac9709f6eda91505 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8791fdd151782ccb043aa505478ec8a39b49164e6219110a91e9804b4dd2edda +size 82338 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bea3e96d398da25fdca27776ab29b122314669d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e6b939fdbc20711b0e3a316c2b7a98773bea7b621b93f9b88339e9d5bbce30 +size 89703 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c40a1c92da41dec53f0259ac8b3a02aa9181d3fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e5822c17b3006a98306821efaa12bd9c8296bfdd7d0f49c20e3e9a53411e75 +size 1002138 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..107ad30f42a8c6364aad47d5075172892b2623f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c08882cc3e4043148ee431db4c5759bacef4fd2728f93394658fab912eaa7cd +size 132689 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd597dee143542d9f03b63367718824b3a55533f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1fd2e82bc3e96b721f8ac8ba589b76777252a9e94cf2c4208a052527106cc8a +size 182528 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..775e752e78a6c55bc8fb8415bc10dea7b6066eff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508f66c2b0cf1642253621ebd7378096613b3faab31748c6aee8085b94ced562 +size 27462 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5d1e162616e08528ac93836d2857889ba4ca8ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b4e6da42e13a14e8656cf11b24508a4094abfa4f64c5cf13b0a49a086abc68c +size 120098 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed076fd354227fc5fee788baeceb89f751a6b5a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed3023686318b727ea063b9be4e277110f40f2c255cfd1354adb955635aa03d0 +size 57952 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd8c3dedd92062eba76a3c1913b7095a1f36328b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c643e6b953b8691b8f3cb6f88388b5a7b98871d1faa923da5fa2b46edc3358 +size 25375 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77712a85a60655b1137f58ab6e0af2a3e7732a9b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7e8efa1a2e5a93890f11b27f9c140d5a55c4d6f8e97c23688b82331fc0e023e +size 39384 diff --git a/eval-results/mmlu/0/ckpt_090/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_090/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8c8ef051785db2e35d4f7353cf54d4f2b5d617d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d33ee00728888404e988faf02f7f5d1b7bd10648261d22393ddafb4b0d45d0 +size 32950 diff --git a/eval-results/mmlu/0/ckpt_090/results.json.tar.gz b/eval-results/mmlu/0/ckpt_090/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c784a7c9492070d832a18e2df1dccdc84c699862 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_090/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad4ff89ea3ca7c94363c118424c192feb64521b379d009a0256f22b99ed53e32 +size 7592 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37ab1f8ae67026d3335aa8985df2ac57235475f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01101d323651ad724dddd3de56b861c726e410bc22f248c8e4b74c49d12a8ef4 +size 17018 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d33832828ae9eb563671e9a8a8158e7163068dfa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccca79bc069d2beefd7311d5af424caa055581dab933ec9d39022499e44d8398 +size 29788 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..091f258b78ede466b3c7fbf3dd31dfa86ce06a9e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c4307b4966bf7b4a08264803147ecaf57556bd0892b2afb3eadcc1eea69e39a +size 39773 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a85185185ea5c7997dc2dc2cb5d7878f299ee5e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a19b26146aac4e26727de421c44d409e4af100689ba4adaea7c30efb02be600c +size 26715 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b50d63e46f86674cfd0fd2dd818b3f5fa778ffd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d20b823f027813a006c300efe05d024350472ce928fc798958c90e49bd315497 +size 61173 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c55f1382eb85388afd564c5a406df9b0c20a12c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ed407c15b69835c1e45065a744eacf978b405d65fb273a74ae4540f7e38875e +size 40315 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d5827e69d00e52bd8c6293102f27d4f69d835cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f8c865b5637231863b94b86d443f70d19653a11318c17eaba120d3557e6e585 +size 23730 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46623c8b8f52ad9c16fd852b4e526542232840c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5e01d06ec21bd3a4ab7d79f97131d5dca001ea018f9530fabdf4c8f2c86af8e +size 31028 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..735cb4c88a3943dce098528b968f5488a0b752f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35f447163effcedf4395dceeb4af3ac76910ce46eda1ce2c4401baeaed72523d +size 22897 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6bbe42373df83516a0f11ae221db0c49e1ac61d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3412a2bfc61ae26dd8f619eca1a9ab4aa6403fd3e18575b9e21ba5cbbeea05 +size 60830 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10070a8928b850178711af2284cbd95e2105adab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc87b0e87893ec65c8c008d4e0142b761b42bbc57d710f19030608082b13b179 +size 25651 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d9d417bd97aea2a75f0dd382cf087ea63e28368 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ecf156005ca64c9441d6a85c587180fdf5abd7d0e7ad3eda2e823ddcc1f36ec +size 25792 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34d6f3939e293040223e7230f4f81d5e716a8d54 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a1f46ffd60ec5ae7c9debe83266391fcd4002e87b1cc3d8523b1e9337cb123 +size 46416 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..adb53e42f38de8fd78ddcff42c3b9143b01c47ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e91023a5310f8c5f65ce005c2b5b0f0ee1b11f8415f5bc299c9cdb2d7b2ef62f +size 31403 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd8367e7d74cac55230cc3572e17f036e0ad92c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb7cbdb716b26ce948773202a21b657083d464ab04b214b5ed6e4cd02f8a2fc +size 28752 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..125532c846d3cbd198eab5c0138151f70bdee8b3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:170244d23357f29276eecd6b67a053dcb4b1f3dc85098638c2d3b3da4205e6f0 +size 74378 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ee937f913e5fe1eb47196d108fe3b01f598f457 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:360c8d4f7e512f521eaa29684308e076d209d1b3cc59d890ee8a7c8cb4d0ccb3 +size 29998 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0bdd88f354bba69e471931f1c10230d8b31f00ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a8f42d2c90ca822061a5c0c4c8bf0557417d6a12bbccc809e98d7809b18137f +size 19066 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95030bb81bea5275afbf83a8bd88a56d0a5f9ae8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e0120de0cdf13de1a7dcd327eb5c260cb66a4ef1800b03cff10b3b4d8e3f98a +size 87873 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2de2f35c5ee7f0b2dc87f71edb866792f2ba76ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c98c9607c88a2f6cc41f3c2e06145b9f0e958503a9daf3c2c3a6793c0acbd26 +size 49841 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f3d3d20b7a9b1e5f011b0b510d0d6b8e967e0c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:923817e43ea65013da4ba69b2a02a158cbbcf56a60f8c2433958b3699cc81853 +size 31527 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2b01d44412143648039d703a854501fac0763ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5045bc7cbb903cd73cf0f259e9dd87f505e2389ef9f77f1c28cea78ad4b6765 +size 145276 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a663964601bac7d0ff849eed6b3851fb69b43c1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac256e4df78b8bca386fe25fbedc659ec50b6f9e36bb77530bd656965e0a067 +size 44535 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2de75edcc71c7dc5fda43b4c5d4729dee3254ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c663a8e1a48207d5349b025f821f5fd8417d044d0381802a9f5abdbabd1d0e3 +size 54330 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..287785ca75a80a1dfd8902346fb3c55b8bb872a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b93a7a2d9b2c00b7e3fbfd47b5fb915134cef410ee181a398373355b9cfcc27 +size 92501 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33a952b00590525f04d33517ff208984bad9e8fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aa880eda2c1cb850c73fe0337db52533f8d8bcec979adb38c678397e5d94a84 +size 57026 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a38690f3cffea61f5388f6accf2aa8901df14639 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f01a5d0b8170f061f0c0960d4f69d7b0897686f25e754883d431bcc8c918698 +size 58361 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ef9d81efba4d5d61f304dbab84cb756c5f0fae4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a7d3d30bf41c44c137fbe9ae98631f95769e07fabf7cb76878863cdda6df701 +size 42759 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17dc9c61d2256b423d86d8ac4ec3e489920352df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb312c78ffe341559880cd73a7974237e287d6156ad4dab849711679b5d25901 +size 143757 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..204e9bbacf588668c141a196b9f83440f4dcdf3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d86c08d091bb0ee7f5c373abee005cf14952620f0103860e74304b1fb3697f2e +size 70986 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f251ff40eb07a4d5f840fe3a9574b62ba5abd539 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24e9efe6cd0664da9c63037ee5a7d876044e0ef14fe0a706d818360072967ce5 +size 161858 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c24ca90806fb4584e24d8f17fa3795c383593341 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a50a7f3dd6c419d11b513c76f5f60d34c53af6831813d756b6f38a6a6fae32ca +size 209297 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b8c461b1c7f9db3c11b129687a68246c569fe03 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de9cf5777b93a86cdb2f1993d5097ce6e176a1c0118bee4fbacb5e5aaa994532 +size 49744 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..333248dab71a4fd21a4801700238959f85680c1c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3fa928a8b0b339321701b1cbdf8edf93ba95f13daa3064a42dd92730b11a3fb +size 31468 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e38d0ad1d40ce5e3de7e30ede2dc7f880264fc0f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f638d732349e46b3fe24bb03908a548e3093f9ff45c2b045c9d136b617a1a067 +size 36004 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46d148782f72380f57a2e532f5aa9ae512d6c89c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c49546436186542d5f2b845f6581c634c5cc3f6f1928f4c5a84a30f14011c388 +size 29401 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bca87ae51aabd894a92eeab8b9b14e5c0deb42e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c98f5cd45e810172634d725d00a891864d797a1adc31401b4d9bcf0d815674f +size 40271 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3ea01635b67248181105ec83ead69541e667e41 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e000e97a4e6c735b8e7d1828928cfbb5d3c1780e6ff7bb3ea4d731b7537309 +size 26980 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..368263fed6cb4a2f7693bb30af5c45c960ae3550 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc64ce1e14d4d1690f6f3489b35783311ca5bdfa3c13ee38cf5af07c91d2ad49 +size 21676 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8dc0a6a2884038319593ae4a86f6e63359cb9fbd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a47c1a49463dca7e22aa687f436706761accf6e0972534950ee58f887c7e5ae6 +size 57394 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d19a2c68957e7c68984266558be78c57996fbef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccf2c4390c0c96331995f9a61169c8f2cd207af9fc8b87652b8b49650f20a7b3 +size 22217 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df4e6e1a8102814b90a2cfa4fbb2518c8d6c2151 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:604006f49d1c8a8ce766dcad568f2b2359295030ef43e1191b76485ac8bb085d +size 169236 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9aabe1a90275be42fee4fe2519f37576f340d522 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e6c4f419768ef403bae31c95ad2d184e7cf1d72000d745e7395539905fc0c7 +size 89988 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c29f02653b8e5adb2056ae05206e486594f65b4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826bc943968b33b253dee2428a1e6d769d1c521c23ac62d922c4b5c74eb61423 +size 153756 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fadb161ae6781b9fcab522a4aca8f93148acf27b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4272560c83d4dc36289dd9b20245bf5f62a219f0711f6c927ad600d190c11114 +size 79300 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8551ac3f610641d2a6161cd53518d3c310af1479 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12ee1151496c0faff3575d63d68e831cc20bb87ef16bc57fa85a4bbfe232a24a +size 74195 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb4f1e8f2f24f39c46445554c98cc95fdfd45f1c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c096f3648ac1c4c4553d6c41ad6e455f08a447e0ac440a144802af7aa5d12525 +size 82457 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04d77919d25a5ca754cf201323d3e5a5b298a9c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f80c944093157190834f4154689285cef1c3528e6f318d136c6fe8131299b2 +size 89749 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae09181012784e0d16bf220687710d2e10cbbb29 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6928df1dbbabf262cd7314e1435ee681814fdf8d6d44d53264c1a11bc08a45d8 +size 1002927 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3fed0abef175d3251b3f3d502d767e9f1e1f0a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8694e72000d031806fe304c05de559732de373f253e1e82cada094cefe27f55f +size 132693 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1f9019cebd6782eb5e031ab07efbd078d953e30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8de1e767071339b8ef8eeeaf8fb7a8ba09b4a45899236ecee5d65b40ab1feb7 +size 182685 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f710839f01011f61281c2f9fdca4f54dee73fff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4cd36672c9bc72f756f138a5ba5a2cdb440f3e6562dec40c837883e7676d1b8 +size 27514 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f0a0341c7478f372327adb9151178065eda20d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b417f71dd968bb43555b341043811cf8755edb0c9eb17eaf515e027765ba2559 +size 120203 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4d6ad45a66d76d8d187138a543bbb26e45cc6e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb96bca6e7bdb77c3c96e3e49d22c203dc25e94c23df00ad43dc5e18d7965b54 +size 57976 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76dfee7fb1e41c0556c00639f4cddd1cd313e61d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23356e682e4407b9faffe9dd5a9b15639f1782d1430ff4601481170b34b49414 +size 25364 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c83e8b15af9478c4695bb49680453ac6fcaa976 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e9555f5c8657dbbd2095edb5cb4e1d0ba3cbc3257a2602f8e6531135195fbfc +size 39425 diff --git a/eval-results/mmlu/0/ckpt_093/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_093/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6357506f75e8c53f036f9007e520299b8f331ca1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606cd0745b10bae4594a114a369571caa9ce44db338cd08312f65f8a48f01351 +size 32967 diff --git a/eval-results/mmlu/0/ckpt_093/results.json.tar.gz b/eval-results/mmlu/0/ckpt_093/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea07113283ae91a9bc030937c39435d83b5d5f7b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_093/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fec54e4f59b38bceba33729b1d0100ed5e0a05a56a2d5cfba34a2a2d992d8021 +size 7629 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e5ff477a2dcf05711653681cdb654c359e16268 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:430c0d509f6cbea76f9947b6e750d172dcd079af0ba72749ea3c252d49a7f324 +size 17003 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec61c3af71d56bb615ecaa3bd1e74726507685b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6187fa9a027fc41e3f3024d4147e11c589424d51692978a71dc2a86f8dfb3e +size 29743 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fed4197ed3ed75970186b161765ae5bbc722126a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80557acde7399aad3576ca4ab7467342048d0b5b0040afba3a247db3508824a3 +size 39760 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ad4c43fdf8c6d36f032136769c0b309a6fbd6e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c760e55c0259c88bdb631ad77617c769cd5f866f7e0106490164ed62608aafb3 +size 26713 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd364873b6d58bc4251c9a06c86202aab499f6a5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a83140c6542fe18fa117bfe50f155c8aac215ac614573092f4f0b504ad1f46d +size 61072 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31c9e125b54bc9e8629613a13588c8099e004390 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5df8daf2fb0a829424068c926d0706e9e2fabcd035d1527909a4484e33b720d4 +size 40272 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa27107b0fa43fc084bab5466df8095d9a6ed6ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8ed09a8796a657f84c30a1237189b1962b053937bed5f3b95b0b830f78e655 +size 23707 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56ac4e7f7bfb46e3618b0adc0af4af92bdbf734e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd4fd188fb05c5407fdd2c93b743cb977c59fdc0a1166a4aa77a821ec544ed1 +size 31031 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..990e59bcd60ddcdffded57bc8bfd2161a3011d52 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a2b8b6492078d09c16d507e58ee6ba7beafc6cf3ceeea045318188af30abd8 +size 22862 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e374000cd9eb44c70e9c31abc0b09dd48b41048a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:455cb1e100ab229ac4fdc8153dced3fc1228e5e09c5088b2575233449ce706b1 +size 60796 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6c76953ee1f4d50d2300c531ce1ae683ffcf7dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d51ff4e634b75d8ac8f5ea6bd3efaab3b77c1c74f4cd634e949ab38c9f9d581 +size 25685 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfe1a27e3cb67639ff8567f17146f8aec8773795 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea54f368803d4250faa25d0f55ab2e2c52d9ebbcc40df0751fdd0858c1c0d362 +size 25720 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4afd3f78eafbda88080e67588a86cad980fbf3f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:666df1ad1c26b373c00b3fd55a78cee774077c79f51302ff57df422a77b97039 +size 46383 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a766c3e2497b3b8c764995bb2515a06f5658fe30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c218c4cc62054dda28565c8adb61bf867b1f1e53d96282f6ff539477830f379f +size 31372 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fede3108e59f6efa422aefef009c1df4a62f9b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e477fb15ce4a5412ddf497b49d691a5762774b242236298ab6f3d1de5d63d2 +size 28717 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4264f68538fdb9af4f24251cb0176969243670de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac6bd6b5f018bd4c7b9331561b1a79d1a5ff34699ea41f361881c41347cda11e +size 74282 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f43e4b013562d6bf2102a7fe9e58efb9add78bd4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:622cc07dbb4a3071a5b12f889f30793f28f07e1d5f06ae8390f6f89cf1c5596d +size 29972 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ddf863c3836507ea72b519495836c14ee15d409 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d308d3954a8942fe8540feab96a3ac845f9da99900eaf7f22d3a324a462287 +size 19065 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3ebc843b098672cabcfb8de1f324e0ef6d37f06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607ca039a283ea579fa2afe665dfd1ff1e046a360a87f10c74e40113a0e5064d +size 87741 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5e97ef4c8b305e852e133314c93505f82120621 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:536323c4c36adc29bb5184c2bd9ee681fa672a564b88f8bc21b660b0d7501fc4 +size 49819 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81b65310f0449e2de9c1ff3facc93a69bd3231ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1ebe10c0a84ad6333e83fea51235f0ce0e6ea46535df4ea1f5790cd24c16d48 +size 31508 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41f570916c15f20db0562e7ef6c1fbe82d9e3a34 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1289f246414eccf8b91f41923e9cf5d905a1d4ca0f8baf082e332c171371e31 +size 145099 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5aca61fecd67b3c55e245c042710bcedae5a6746 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e53a89f35069f3c609c67772c071d66c657b38177781137f85953219a0698e1 +size 44456 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79637cc9ee3590ab851e014667aa7cdfb58add4f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf0e34b04d66ee01a40d3e6e3170e03dde8eeedcd467ab6d25270505ed9c7971 +size 54323 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c3bd61b5c1438dc0efee6408763a2abd7de3345 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb71816eea9a4fa38b28c62767d4d853642254e34ae30ca81dc6b71400200eb9 +size 92413 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56c789af682a344c7364078a9b81916e96c36700 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f3e5522d89504254bc85452a94c107fde4e4796d8d4202d62bafcdba009fbd +size 56852 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84f45ad12fce7dd8ee28013047be14ebc3da81f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe564f6fa1ac7661a130bae19406c1fdead65c70d3fdb1617e27ef34ec235b66 +size 58254 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..977a03c0cf36ad161d37e44ad60fffb21066f557 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58e79801724830f645f1b2225338d53e8a7460c489d30e2b0b47b904de193eb8 +size 42735 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8775f7ed3f06c793153f0d980248e6237bb75412 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd361d14e8c430500782d0eda7d3dcf61ce881c9fc7b7ac56fd5f380a863a4f3 +size 143541 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ae83ed7ad66f09c8b63edc44221470999ece6aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d9aafc6d151b168732f2a5f07ab158ae3ef4b5c21dd741572725155c676d465 +size 70875 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14f16db58e9ed95315d9bcdbea9cb4d02ed9eb37 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da7482f9d0827536e4ed81d65cb26d05ae2b6ddb4d2272a744d5c79476d4de6 +size 161759 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f3a58d14eae07e3e269ab0111bfb0bea297805e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea0e6e569937b56e16fa1ca81e72988c220e5bc10580fbae692d4133db53e327 +size 209013 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b4035c994f24bc66ea59eb5f469b3853b4730fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37da7876602b48e71269a986e214fb583d9e48d63e62a649d2b4680f422262c8 +size 49778 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3554a156551d23eaee391e29e49be104377fd29b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:209f6a193faee2aedc198a65fcb88a04fcbeacc8579aa0b54021307f01e1a367 +size 31464 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..730105320ea90d4b818bd0499f259da78c1fa02f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30bc67a5386d8f0a2c923188a1c9ec50e0d23fb8595acf19d77a494bacde7790 +size 35998 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f728131875cef7e54faf5ea45b3bc7953427bdd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66791d330c626b0f8157b5fc1df8f17006e0b557f4479d3a9d40ad0a18ced77a +size 29387 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2465f42881053fd2d9e6216e9dfea3cc1d5abe6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c123c11529c832f2c5f6b2681e8edd49884d86e99853c3ffe6f42423af48d3c +size 40250 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..226e7e6bf4e3c4c8015c0afa33e632202b5bd302 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3907d9f04c18a40f3ccca9b97a52ea97d7fae9240c90b5d49ddce330929445ed +size 26934 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b76b5df7d9caa4f964542938c0c247cee70aa441 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b23e4a1ef031bae1a7e30a05286e716240e3483dc624647ff12bf9dc0a92be9b +size 21680 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..106c290cd9d8e2fbc591e84c441bc473c5ed26a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7084c4092111cae6175932fb5e473830c9d323f8d6b77150b6e21e993118ffa +size 57355 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3f08ee38d17114e25e6a7ae8e970047e4d98a0d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d0e9990c3f569c702809d8c50aec7f043b0c7ad64561dfa0a67912879bb648 +size 22210 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4a44295a7f9ce5ace5e6a3a1e7b5b49730057ec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88aaa02e2e79bbb25542b6c3e60a9bd89bebbfa9ccd33c95311b44f92ecd2d2d +size 169079 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2337f2d2593ba1622d9af496a1b6b3332fbce9ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4572f1c73e5822aff2a4be030127a4fc2cd545644744d320a0f1cf9ad248cc41 +size 89879 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe376ecf6a5dc2b83f08fb803f96d285c83e0cb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b394c79550f528d5a26c17feef654f40a1b319bfcc927170f3d200914cf213 +size 153577 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e158731d8a39d76d0a573fa4807880168341961f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b09e956e5c983f33556eba6d5e2b40d16de37efa6d4f46cca52830093ac3355d +size 79235 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06ff2f803622baede41b513b63f6c84525899989 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92c13884b1c07176626d947c6229d80fa16c7afd966519531d9152e57005a0d3 +size 74111 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfa00e233575b799155aa7a8e1534a5b86632f00 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e219dd7b09d4c84fd5121bb2c35c56a48dd51b82340a19ef35a7762c4097ca +size 82356 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..320f7554af8ffc37531ae58eae8f31d024820570 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ec59081bb93870432955d48b4797c9bba74af7c24129fdb000be266c456621 +size 89588 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ed4d506c467d37e136eca5c75faa86dafc12b73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f283037e2b76ce60e98cd17e08a4aa41cb18bb76eb52946382b01b1ebaad0564 +size 1002654 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d5d275f1ddfd418729c367ea2ec1612b6986ce8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:890c6785ee02c9f2b337c63c927e7a54fe8e9b2715835a286a0fc2f118b7ed54 +size 132611 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27e4966e5910c5597886b43471891a2b1b2a1f6f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58f4bcef49357805451f38d6749ec608629a8d31ee3c7f539dab5d20fa7e4276 +size 182438 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad777f43bf21b98677a717fcd87f9496b91db8eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe5641515a95d8531011ac85b43eee0a53e3f488e2ffd0aef12efd1f77de08f3 +size 27457 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f880d5f51b266f4ab76465e4d2b4fd49daff10e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02ecabd67322efef8ae418d1f229e08430cb57cbae818d9b28d7542a30743ff1 +size 120067 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e9720f98fe970da9bdbb120e4481ae4a34c2df2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de08b69d09fb6eeec23f4e2ae88fe72d804f4e5148a0962f7b82e454b8fd8b6a +size 57926 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9d736375d8c2b8a16586158827bb9725ba29fd9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad7b4d5bd6468206d0d4396665318b0e4ca8b8511b3ef2e8289deee12edda614 +size 25364 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cd2025cd89262a467b58a7384ec819cd3925f62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4c7997ee755a9fcb1facadedc02f0ffd20e9b8ca577690617d5226e0154bb3e +size 39361 diff --git a/eval-results/mmlu/0/ckpt_096/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_096/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6fd5003e4cf7840355696e0b0428869e8bc03d61 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6bbf980b32caddbbb2512d05b520ee567aff3a3df9c0c9e9f7c773ddb639c09 +size 32910 diff --git a/eval-results/mmlu/0/ckpt_096/results.json.tar.gz b/eval-results/mmlu/0/ckpt_096/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09dc7a683bed3f80e38b6b91d9e379fc3083946f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_096/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5899c262ad419401b2a50ba28f30a58d89fcfefc3792de37ca8252446fa63299 +size 7616 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f932b2c613f784c32af2bd8fa5b22f1e2258561 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a126fd47ed426403ab04c78f4318f06885f71cc370a6c5e0935c907c5798b002 +size 17017 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23bc607ef387c69c297173ca6ea0b1a2d2231a8a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a548c788d12a12bc5d650a0b54daa04544ca2f821062eb1e2ae72c44813edd +size 29761 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8afb89a6bee2a043ed4163ace5e728e8015c2e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e83aa2c49348288cf0bab1727524842a0d00c7741fcb087a5211fe81cc1713f1 +size 39748 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e15d1e9f2cea1be7e9baea79a5d7463bd46600a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed14e73269d18df59c179121294f4d05628651eb44f889937b0a9535ea42d77 +size 26696 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03142cae1b60e3dc609dc7bfc9bcb4572d8491d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f67d81d6297887db825d505d10a5a7edede4c571df4a23abfb1b64f042cfdc +size 61091 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d00b6f312497f57d53d24ec707f52549f196219 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0476f3fb6cf40357af5aa89a49e28451a0acfe607859da65cba08969c885ccc3 +size 40297 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..527d99775d401ed378f291f80f824d88f526194e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a65884e82b84da85d09003afa423f6d2077833f537103143dfda4ea699e149 +size 23730 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2a7dca7ea65be582fbc10373554e30943b8dcd4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7670b25a4786bf40133cb630f74fa1f0be72dc1202e34e43f5df57d2447ca6dc +size 30999 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41d896f7eeae0235b3d94e1ca4f6534bd78d9acd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc18443a736b7ece3aa983e8970c2243ed2cde38433e0a6163f73a470cca205 +size 22879 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a378f0210d2b4768d6ed4ce1ca2b609966a0f385 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b355e8390c9e64549b27d535e3899614f3226ee12c32338bdf56ee06eac9b97d +size 60846 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0a424083258c28a6383a89d20c2413890b3093f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b1574411633d52736a156490cc7bc29dc7c439983443136341983d9e3c094c +size 25686 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b458c6ca8ee2dd3922353604b6d2193336ec3eba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f121c6b71faae0e665c67feec71a180bcee807ced9ac71472c786529dc03db2 +size 25756 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39ce971eb99a7dc84bf2108115589326623933d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1af91b7fa2b215d2030c3208c8066242bbc8866f023e7e919368d22e46c07b2 +size 46407 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76500d599fdd6bb36af9316c1ddbf74fbcb77999 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd7953238eceddbed848cc3a3b869898ed677d00205293c0884eb22abe95c4ea +size 31445 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8fb5e6270ab4f7df246c95b63bafdf577527f83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ca9972f8c54a0da932dbec924b021987c8b9c869ed79e0c05fd7cfbfd8aadf5 +size 28706 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5d83132e48179bda13fbbd950611ca947c490eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b878d23a20dd69fb330acd0a5a258308252eadce771c8e06fd3ebc27c1597e5 +size 74263 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc79974148b90621d7f4901134c15711c723e017 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215e0d8d815364b018176403cc6ec8bff7389b8b171c78a6ea99fb4cf79a7b88 +size 29982 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1a6d992bfe2e7ec089635354db92445d1b13377 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90b3bf9ebcd9b72923b0dcc0b907f0248c1fd6025e309dcd419428ba7eef9bf +size 19071 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47a9d8d494d39c3ce8ff51cb71caa785bd9fb83d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c1e13b28e52b91781feb52cda6df3d5cee046ac8a5d8262e90f7586c9478558 +size 87879 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1984f6291fd32d1da5bd32144a3b7bbc7c49b87 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:441db73e77afbf85905e6c9f793082c6295122170a0f1ebc166d768c42b7ac16 +size 49786 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..340afad1b872c1f3bc7dd22810b9550c82fe89eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:434911a3b543991d525b5c6dd045329b31d4f77006af4e848b2350f204eaabd5 +size 31523 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..542cd7ec7875784840115081a3be566004af2c11 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b03e78a19f1f05072ce93448abe67e80d8ba462ecae272df9d48acd8a94862f +size 145192 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b5de412c50f7366c114f6f65f60ca16f544c916 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6932bfed1a7ffe5f14f98f2641d309e6587d6f3e9685e324ae8c82ebc15c3ad2 +size 44546 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b99eaee8015cbd96d47c88634e43d8c5676a615 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7aa28ed6751a48d3f20e59c5f8c365e32558368f92a5c90eaa2eb573a4e6114 +size 54328 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..677ff302adf69e08340a1cc3a6c99493ae91cc3a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0186ff60e1587e51ae6ef45761330e9bb438d464a8a0d1c2bf159d0057dc94b6 +size 92401 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9cce87017c63bebf40546c8be768e237f369605 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f67b2c297f6f30460c6184660fde887a51249b905c9b8d4c2592102f8205b462 +size 56961 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b85b8140a34c580ab220a84bc8657ea0dfd70a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5e19c0865de2702ce17d7250bed0d96f7534767ba73d0456c07d1c5a4ddd76c +size 58253 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7d033bcafb040bd632ecd9e7db64132427bfdd3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f087a877ddf506a46535d3b5a9a5edeafe8a3ed976709884a2417cacb1fe08 +size 42761 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ec6403dedcb82bc452283fa84d1be8b9281cda7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7898989d77d9af8ba8dd62b44c5c09584d9ebe48251e133799fea376bfefe81f +size 143768 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdaff7fe825e9a77e8cc6607cf53ed5dcf7b890e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde1c8338b15c13629c910eb9c9497429938bb025aae872066ae9edf8a5fb56a +size 71057 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20352eba8c6056e7fcb84eee81a82026c373fb20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2607d0ea8a3343447870c43f88e00e4fd8555a9a80c0634e43d621d37f510785 +size 161840 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf35274251b9e2931373e3a9dfd870437a68a04b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1dbdc57946f2366e7b442146e479e487e631e5bb3f65ed8707f8405f250e349 +size 209255 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a270d60d3417c3726ca0323e44f5efbe080570f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8ce1c9848752a6efa9e3eb6ecec2680078d5b538eabc8e8688f9f13eb1f052 +size 49796 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6167bd2673cb0ccfa7df67d7ec72f4f6861fec46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:514ed7c2362e044c9eb69bbebc84b6bc1eb6445be2d7d641ce5631cdff923ca8 +size 31433 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4018877e6694806bd8d0411f54e7f51613951202 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51619c00d2468b85576fbfa61bfd1cd3fc58bc74132b1c3bf4cb0f6ede684f0 +size 35963 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e66e35ebd611c2307c26a0208be25e7012415ad2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3689a8ce2ce3f0e85395585e3cb31eee3ffa71f9a2c19a0dd460c4d98d4efec7 +size 29425 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9783843c3c3b463ca0b730cde30932b752e43986 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a221e772baa5840136e2259030cb1ff832dac0c8831104b95b60fcc7de10a7d0 +size 40265 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c1e4fec5042039dadbdfc44fd5393a12b45aad8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe8d4e3d81e3a1de1913f0c144212b7d6850b56ff2d2055b6b5798e7a4f850c +size 26956 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..626c241065dbda97956c15c01f30bc6850cd06d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ccae519d61b06ec5e74f868a3c40c99b655ee8518408362ebe7dba74be2e76 +size 21689 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f81734290786fe992a276af02b1661a53fc9bd8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0151c03ee6b3149f175955d4ac4a9a17d17ff41b744700a796b3a1061844649c +size 57331 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6232bc174ea418ad48ea086163bbcfbd07a9ed3a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c67bc1aa81fa97d5844ca053fc49060e82cb167dce1d12f4d9f2350edc5d2c +size 22250 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17f0bac8adc0b934850713da5daf63feba84a4dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db4c2ba11b1fd936ff58b31459367de3dce63519808de612016a79e2ea756994 +size 169165 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eabfd860fbabd6e51dfec937c322f04b723494ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50b394b252114c34d1bb2bf0b446b74bed7c1d87e47e7e86135ff5e518fea08e +size 90037 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21c83231bfd5ee2836a1cb47a9b22a3250259ba1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36c3d27512aa652a24de5659d24227d5699693550a3be30dff5e81ef1716d943 +size 153771 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77a1c09587b86e0fd95e601313712d8ba1a6660f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e4f0fd1030820fa2dcd0ebd46a3e2831a8ae480cd568bc14341abf2244ae98d +size 79249 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8142790597a521498b1023258870206fbb50742a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ada070c208548a554a23e49f691ee03306ade3b4f45c90be32b797e12a605b +size 74183 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd32f6b818ec5c0f22174ea9d0e6ef31abca7a23 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba988cf368c8c49c223b88e0e02673b2f8ff1f0de70f3d7eb9cfbe00cf88bf56 +size 82434 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..995d57aee041740848c85c4eeb53330d2347004d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9afd84b057bb8f524eee4860ab4c01598b09e831d094e2a24bc170ff84fc4f5f +size 89621 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59d36fd6fb12f4c2bd659183251f3d84318da461 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725435089d5590678b09f8370e75f2df89eef7d5d4c9c7160cea051576a8ce54 +size 1003024 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67a2b885b85f90a5919997117151cc0c7e2c2aff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d10d61b5538ac6d2ee0c4657aa491beaa320f4e713c740e26044eec7481f0f5 +size 132674 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a13677363d29b1ae8ddc946074e8109cfa4b5bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92cd3b9bc3bdff737cddef12555d333a1bf0140bd0291bc8bb6b40b2f9b86cee +size 182605 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acc118ba787e38c70b1cbb7f9cdf74c580d9f710 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:592578e4669f5c987843d073d4a081fa9dd683e72f3a7cb3c18e9f25a856bbb0 +size 27521 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53e8aa295e4367ece35137b41796d8774b556e8a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2f4b225365e25f49a0f59a09e1f2c71830133b77863d6659bfbfcbf1ffd5c8 +size 120160 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82a8a716d540d1aca99215e10b95ae45b09538fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5fd15a40a3502879bbf0962ac7bbfce4f4fc14517e21a3219869d8864c060e7 +size 58019 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6bb9a85991b73b880cc88be65c83fb7a0dff289b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:092a4ec81ed062663c172072598d331543a3aaf23c97abdf7e975454c952b348 +size 25435 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3250ab44f06725dcd890849fd64e0c95704e80c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b775fbe34a59fe3909f9d10a3be4af903b61d38ef482cb74c2566ba610c35f58 +size 39381 diff --git a/eval-results/mmlu/0/ckpt_099/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_099/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6eb86d4e57c1b0127abd5b213c22d15a1ab5eab2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f78daf264456e9c0977a9159b52a9896acb27f458c0663dc2d5db2f847335472 +size 32954 diff --git a/eval-results/mmlu/0/ckpt_099/results.json.tar.gz b/eval-results/mmlu/0/ckpt_099/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ab0d74a246c818de54a2cb7df2d69253cab7384 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_099/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0553e05b7c3df193b8d1819c9250c733911a86584045e8df3362623a133b4d02 +size 7585 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e93f13f61c6f4e5d17a31e0bcc36ecb67c00f42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e97ef3724d341f395846fabcba2fe9f910cce23c3ccb53e1a223b60a6922a63 +size 17028 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..789b62f1e15af24ef4791b6d1e5c3e0db0916101 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8626241bc54e9f9d33ca80877566fe270c3db643c9669451cad33a3586bb4a6e +size 29742 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc8fd9c9c0e169286d3f32f6532e22833c9b2d84 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aecada78dcd024517380f36b1cc8bd5c351d59f2c451b9d8d349100f45d8a5dc +size 39791 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5fb462128442bdc29c91596844e9daf037f2902 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ee1e49f50b8fce8ab239c06d1ab8ff20cbdf440739dddc4c3186317141de06 +size 26732 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d17f9b9a2329d69b6b7f919c71705f86e2517543 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7108c891020a1b8c6eda329549020961ac0b778c313fd88df39d3b8624fdb971 +size 61160 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89e56cc7e9e339e2c970faa27b3525c9acb5122d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39c4e5290e981c83f1c5ad0b06b5074cced545a3359e385596f61fb358c2bdc3 +size 40308 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c6a7f14e616b05143ca61549b13495b1f6bbad3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be729c5acd171ad845f9eda9c8a7905c74c94b9345427e12fee7aa7daa64908 +size 23702 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e78431c9f4eb1d406ad3b1bc6423f85a00d743c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1caac2847c9172fb44d334512c10e5f99e171f7339e1394238b453d20fc4fe5 +size 30995 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9065a45bb37ee980d45805f408f51c2f038bd59d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751229a86d8420a88030b0c1d97136f2da9c0888c5ab7b3183b0a401745f9924 +size 22872 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcfd2e5e346e3cbcccec40a878ab9a8e7f2409a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c478b293dfa5d674670daf8da66f90bd5774696b957cafe61942d45d972d17d6 +size 60839 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c1adaf448a89fccc5eb6f255c4c970071012ce0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6875b598069325c627aafaf72279f2e4bc0a15c0ed66178b65f00129c4f4a1e6 +size 25641 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d2d1950f564a5fa727ea63fa5a8c92c6088aa91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466d7bb511696523275c2f931fb87c657070b5a94faf11f780b754d6d715afe6 +size 25762 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33e096babdf45e503500bb42f069fcbad41471d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:141ff1864b0819549bf77657ad840cf2d3444cc3fb2b40458946f5aa0b3b2f3c +size 46404 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd1433911e1b02ad45f6fc69b817be093e56b71c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dbbdca406c28b7fbeb0562400419eb5e30b9028552fb5b97a4af86418932927 +size 31403 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..999ef5ba8492fca4b613a9941487b59b5efa588e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73a7d76a50cbc8f9bef59f996c89129561dcd6f13f980722a9eaf03d7a08cd8 +size 28730 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54621c83d82d62340889abdbfa6cfabca9f28fc6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a512cea38e3e6d04246f50c1fbc28dbdb9b566f68844a8c0aff7e25848ef68 +size 74202 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e43be622d72ef56ad07deb748f5f06a6db67978 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef559c206e8ba2fd927285562cb26cdcb0dba8971c533d2f5a964906ee528f2c +size 30071 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64a56e259b9ec3805ac0dfc458e91eb11b175efa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3c3aa104ac97afe8cae7d46d4fd586dfbeea8c98936c83c64e346b84c1f90dc +size 19063 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e689742914e34c9e7d20a6614431fafcdd9aea53 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa72d7be673c1daac2c267e9122f0c24586234dcb2e3235a60dcfd13d03d2802 +size 87814 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1b236f51a5d30c85f3aaf0dc9d9431019bdde78 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837c3c4b590fed261c54278ac84372bdfa0a5ff916a9435fadc3d7ac09888ddc +size 49821 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a2cf05b3fa4689fe0bb2e8996b2139fbaa97dad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:763556a16fbbd7d1434631182845729fc4f934608ea25fc43032cb09a7cae370 +size 31560 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab738c3779419205a0b00eeaa09786a03e17a608 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f94a3b448d8a8645aa98afb933e75be4c963857c8bee895f30ec08b69eeba5cd +size 145192 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a176ca02b88fc474d35b904ff67796559b09cdf1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2816567e0a436c163d967e7e375d7befc311a8e75515971185f5a545962e2331 +size 44506 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c136aafc8a6ba6b6bed3d4f78e335972652b29d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e907a9923bfd9245393b7ab9cbe4a6a2201907edac80f76a1d70726bd67b79d6 +size 54329 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..121f4c12c12a96c368b274e1c70d1cbc282027e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10b6f16c65c35da0d210a9dfaf677c34d895b71e678682dc7ebaa3153ded716f +size 92523 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd7df13459b6dc9a5d58bf01c6598a66c7c3722c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946590c57db4e5f2b0e635b7b864c332f054f58010e661df0e1aa2e08c95cac4 +size 56887 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..041273845564abde52064c39fd24496317f37ca6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92a2e986412e2ef5be1b5ab36e359877ab37745cc9b3962caba75a47cec356e6 +size 58336 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc7365c3f6be401f23114f774da7d919d05836d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eec56a1f42dd7d5cbe80886baa14c0eb1d44c01fcebddd9d1d2ace01e02f699 +size 42766 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e35006b1aea6f2108a5225ea1f7431140ee2241 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d62ad28faeac44ef602e4439389ffb79021bce4dc36310828d8b299c8985e0 +size 143755 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28a341479a3f5b585586b58d6952c9748ba5d4dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a254433c7f62ad77e2fa66c910d1b0c8496373bccd105640dca57ec9df7811b0 +size 71009 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78dfc5c33f97508631f1dd08fb7d05cd0ca21e4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f83b71ade9ba12a26cc214510329a053bd601e91c6af9ad74738f72fe263b6d +size 161692 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0febe2ae0baee5c46f755bee11ebc3739c057244 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee58f5082844dafad8e6c67019cb4d7ca6db8ab3ce31a2f11b01e1069c942745 +size 209169 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24469ad682b1eb1bb9f06a0bebeda4a3e5d9d7e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbfe97dd13ecf545180a235e9ea37720fedd8693484482c7cdf1adc994470653 +size 49760 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22ccdca056d6cea1f2e2efa6e6f56e9ef48ac7e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb100d9b773b787da9081825120234f9b483bd2f6a16d69ded03be5b154e1680 +size 31455 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f49d06a11cbb7c645eaf6055ca7bc913ff04dd7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc306bc8d5d70abdd42c3005199e5bb05acf3e2ace06de9fe58ce1d2111dda64 +size 35999 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab97d2eca32785485c3939fef9255835ce6f89ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:224548a15c98425ed323c377e02922f255bc20599af556e457aee7764e375e91 +size 29430 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8cfa6b941374acb0fc1cdd75bdc82331d6edccf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ac9f51c9b95066f1c7986187e713a01b6207ef38e1095f1ecf3af34d935885 +size 40297 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ae3b16217eafd8e4b81c5093f6e9bf87b873d50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b512204bba79a7c4cc3687986e251d478d51a2abba3d8ef003de79b3f04dfad6 +size 26959 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1797fa7437a1d5097576fd9d7db1e853caf7f022 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1479889e0337ca034e645cb27c4d7b6f071977ecdd7204b7c4eb617342b9ebaa +size 21671 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06a57fb19390f0470bccb27548a0260428f0cc7a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2efb7c91120cb63be0e2f2a7faead00a232b361c334525a57b792db09c6159ab +size 57424 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb686ec2acf06ac369d09cd6717c652ebd3ee78e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae54eb6d7dd8169f4c65a83b2990ac43d643c76259ec173d2aa06b8e5f3933fe +size 22237 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..726d92bce15bb0d0f15d7988f1c59aa8adb023b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ae9ac0856854a310cada17dd66277474d560e28789ddbeece370dd4d20f34de +size 169212 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e555d423925d76036b4862428d9434caa613923a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:198d13dc4d28a53151f4067bcb557998dbd613f316adca47ddbfa4c32cf47d86 +size 89977 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e9a20a0e76dd970181df71e3b69f0becda6a0b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e392e2b951e442e453d41ab56459edbca67b49c785d9df579f08a5ede0ac737 +size 153501 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f75e9ecb0b5e4f7d386263e82ef22f6701ed22d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c39205c843f8273dc3eaf0749f8c95e9778b6e6b50d7e2fa971bf310aff17478 +size 79237 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79051800b7980c7a878e4ca01c3910be6e96b59f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65bbed36c3de71ccc569a1cc371d79b8be70e75fc6372819379c0f7b9b486e6 +size 74232 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fd14b76be27fbcf9dbdb23a0e782f04367f7a14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8574aa9d6125e8b5c09c473406db6426380bf7ca37e07e99dc821032ee190e5a +size 82452 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..645fc9ad20a05f9ee36116f937d038c3653ca4f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8af1b75f6d0008b26911a092350b2f6b46fe97aaa8f2f7a50a384a42a9068f45 +size 89717 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b38d3dfe1b7d625955f07462103334cbdcb37c60 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21d7e86ae979a0592403506f25d392a8eeee66636d1bde254d8a5443939e2b82 +size 1002891 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a829f134903317132b511db810dcb7993fa81c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60d85f39254d0b8eb3e533457c6a443bdafab67fba787a8706fb6b8c50d06f24 +size 132612 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..178a3c0a0ce938103af8bce576becb15a8c3378d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c009d7bcc5e81b5597f48800a4d1f907183c9e4d08b2df9e2b1f5641c6af3e +size 182714 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c99eb60f0fd2928befeec0328cbf1501431eb4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:577d81f9a628c20c59e828d12f3ecbbecae869f54ca2507b66a54921e4543ad6 +size 27463 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d4024f0bde3cb2638c65676e75c5489370eb7c9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:640ce3f22393630b80b5fc8026905c79761b66041c8d53d826f33d9d27ef4a99 +size 120161 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f14adff9205a5492624819d1a391435fab32928 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00a4dcab3698b6ba767a578e00ba1dc4d55c53988883c266dc2914277cd5bfae +size 57967 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c79851ad63ec11b2504957a8138888b34c8bd983 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e9cc25a077861100924e748f298a35121126338761e823b8a9849cdf2c9b0ff +size 25413 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab27e73df85ca9dbab4fbb0f45c25565bbbc9202 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51abddf4499af594d42c7a2131c5db4e90f9c66f669d14f96a1f45cecba8e00a +size 39402 diff --git a/eval-results/mmlu/0/ckpt_102/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_102/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d065d5f51c81e0e45fdf4fbe1be05271c05380f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ead0820a218ed39fd86348620eaae9d4bbc5e6c5618784553a663289f05ca920 +size 32981 diff --git a/eval-results/mmlu/0/ckpt_102/results.json.tar.gz b/eval-results/mmlu/0/ckpt_102/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6eb26078ac5ea32cdd4475b74bfa3f9289579a95 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_102/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa6845000e09ff077fa5c06a6514572a548a41d8d925c8f4cb7163338d46fd0c +size 7638 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdb6eb217f39d6f84782a715edb218e27f1f2bbe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e01eb5d9dd661f85785c544fa19e95bdda056b4b3a78634fb5d8b42d9cda7366 +size 17060 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71442e39e02eed3e643839a88271e133a57108af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e4a61176127d58e754bd1fc0ba296eab910299bd56b516cc39cb522fdd7e35f +size 29788 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de2656501ca238dbaa9f5444a9eb47cb7be0e271 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c2b3f32fc84bdd15ad7e0dec063cf36eb73d67948daf6868a14f2fefa488a1 +size 39818 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..236c03bb63b83d7e13a4db28dbcd0c121a5768fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4502a64ffc0ae47965b0904ec4636e26e3cf0b7a272017869b68ccad77ecff62 +size 26718 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa0bb2adaa078715c844741d6e0b69869d3303fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e42029a8796b7d4a02ff0462553f17ccc5ff508282fc83b5a7743b3e55153fd +size 61198 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5229c6a86e771765c58ea12d39235d225daf9b75 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eef1984bd17a8284fcbbae3208e2cc1c006f1bc388966b7aa3e39394a167cb5 +size 40376 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d5055f80ee42ebab4a144aecc4105929736c99c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e1a6a47ff1eb19adf10bd6c46b264ab1b49de438e1fcb6fec202f349fd8b9c +size 23753 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9acecde44d3f481652836bbf947f04529f57aab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba57386bdfabd4c0ccee380f5df76a6ed0ca0985eaf86165539845f0dfb9b913 +size 31111 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f0361ee41c40cbca8db3e2605fc4fc797cd0011 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d6539f3ea7537ab6cd36ad1c7c32f7d26583ae3e8e8dfdc10f2f642940ddcb6 +size 22934 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86aa18d5dfec2a67bca0bfc020fa669eb5199626 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0721c8967c0fe2a7044c955879926ec0dc9171f6ca622e8e95cc8ff0093a368 +size 60927 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c01ea8f695814921ed67828fbaee00107cc49e83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39926af35611a745ef30ac71827e3750b7754b930665af05e0fa0944e610d5ab +size 25691 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97abd524c028005869d7257a58098cfe81b920f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0696566d5171e11f88f6bd13ddc5eeefbe93b218c4ef6e536e59df8080eb8fbd +size 25763 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74b08c45c6de4f095b32342e6d1a4456c6b7a784 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5598880b4b83c2af72b0d67677bdbcd5ed4cf16832d5d8ab340faa4e4821c8a +size 46448 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ec3fe6ba1984563595a538ad84233d04afdd474 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc516d2b72bff451b316d6975d3bf489e17e365cb89e01cbf7ce9e5977cca35b +size 31440 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..678e7af0e34a89b3d13083aeb18297f2785113da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab80e400e3f3d4bbcc025752dbb3ec1e1ba058ccd24d07e285ae155211b589f5 +size 28780 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a41c82cf59f9bf095a8492e3983d33e8b382b260 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f88d7b52ab2a713df152f900b91e45da602b9cd69a58ff3111011560e1053804 +size 74465 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..912166316777bcaf7e8fe1ebba779eea411e5050 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c06cda3ecff8a3acb74f5a9007768d3d8779e290a239bdb5451269caf046119a +size 30081 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac6369f8a7237abdec715f6526245b18067a901e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e5cd285163ee370211e5691190413863bf37683b0953ece057b9f8d1830cfb6 +size 19061 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..efcd18ac3b24fc76deb5671f2536b8b5c9c0add9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81eb3bc3466fc63f79ff1c56ea28da369637c6a00bcf46ea0a5575cd680ea144 +size 87928 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..284ea6a400894818fe7c24934a8dacbdfd8385a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7b0bd01064596fd504d58a2001cfa813df52ad4043a8d77e85c145ad4c0c81 +size 49935 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fed5f92211579d07ce018b5837bb0426174fa91e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2a283e4d24aaa755da72218833f170971d20bc5b0684c3de684605249175ea1 +size 31572 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c19dc2a700c782ad19c9489b1afc9ae431265eca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7b2372fec99dcc40a3cea7bc3bb021bf5f910274da8ae6df5af8d889e3bc08 +size 145480 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6bf47b867624443def128956bbcb517b63af66ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f66e208537057c4c711524b4332de02862a5eddef21306cd43ab5f5c11b0374 +size 44533 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59a4c774309ce9584c6ee1d3f03557f50278ddd0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5002727c3b17cf544b96ce6bcc4381ab00acdb626ff624d423562ae0b11eecbc +size 54392 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9d07f21854ee72040275e606354b8f2cf3fe1f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5d027e48e97309e7ecead6804d439488f6ae0f26fdf1fc3cd968095d753530 +size 92737 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c11e1e8ac75f8e25a7f43ae7d2840f91bf0b3677 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c99c8ca71a4c5720bacfa20694d08a591d1c7dd17f1520743ef83d7f426f7f2 +size 57072 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5865e59647058a425195f02dfc793efbf555f051 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c3d34627b78af0fd811585e804ccfdca027ba26be3d3dd73ec49bf03b8c54b3 +size 58417 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d5d986c0b1f50ddae710b6efb6ada696517d9b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e906a8101d80cf765c75e3be152d12ff3abe123cdb7ca76acceae96e11cd6caa +size 42841 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f794ab112bedbd3d23dc88a4d11cbe3052e6c62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fda4be04cd4871e64021e563098a2962403e404d645f5f7f08fcee6f3fc285cd +size 143870 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..177f1446a3e8f36f7fd194535262c60fc18bdb09 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d4fb2e58c8491edf5abcc1947888fc0992f3cc3008a578214f3a3ca9a972a9 +size 71115 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3163e7aa77a35af7e1ebf72c2bc7cc7a278bccea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70d3f2c0d90608f5f5b621ca2e777b981a2a9c6e5ccda8c52be94932786ca450 +size 161999 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12874658d852c00eb1e576e185705f966c4e3441 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d11b0d4d2cf11f232f70b422366f51f8c93fc9a18ce91b1f08d7a2dfb1d9631 +size 209493 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79c91180cdd889d5efd38e8b6775ae3c7ee9716c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3417bc7b6f4471f799185546398f2b033abc8490d39aaae1fdaa83dc9df8cd7e +size 49822 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbede0f218849de869851197da1b76d07d2b2dda --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83de797b724eaf8346d43b6fa1b1a6732278d83db689822283cdd7df6878e444 +size 31497 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcfdbb60b6c070385f29ffd90d93594ed3d41fa9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09eaf563d3d018e04dbaa2c0cf345d857b96e8e5dfc440443bb3ce83bbdad5d0 +size 36072 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81a3a9c89f1ada92cb86c0c7063faf531fa82bb4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd9b0067f9a21344df9176d8a1fdfbdad39caac983228b963348e3507a98324b +size 29475 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38c7da7e573c7306d653c4469a41074127cf00f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c7fb9564ccafd3fae71d79f1f0bf244890d2ea99b42b8921e277869c5796bea +size 40326 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a91f81d0bcb9c9370f5d13fb55e1efb0f69bf9d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3665b23cf60e2bef06709a0739c14c5425786ccd1a387d2938052fd1f73b8525 +size 27025 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a82d2c024b15b05ecc3d3b89baaae43523468f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9e76e104a85f5c5f6f1d48c7579c597d9d11d1592561edee6dc49776ced7ca8 +size 21708 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3f13b83be5b8663603114924456d158bb179cb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aeb0988ba41ad6dc822017a5f6e4921f021656c85a709f0fb198273de67e058 +size 57469 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6faa1614ec0fe517dd32c2c529d636ed74ca374d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c1e27c07af64cfe59fe52a71691a86b2c3cbd7493f7f0ad8b4461a18d54654d +size 22248 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05fff686789c6295811ff6528180ec75f014c33e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ce869da259ba64b90b7c2f9e70ca15989d5c9bd8d9911cbb12ae313bbbffc4 +size 169201 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6dfd4a0ab4faa431d65f6e0d5b0972c28515f143 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33de116321bc79dad639806d831318faf9429418c0a7f5b4b380b5d04124d37e +size 90108 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b600cba14c098a216347ae9cb945d52fda40f1b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbc71e6c3a7903802efb8dac94eb8b4d81a717b6c780b1410bcadd12c28080d1 +size 153963 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8af5d561cceddfdb4e615bb0dfc2d66bf73fed39 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11aa165094cb1e0d13e18c855ea8be5ab2d255438b5d102484d7d1cf22bc6111 +size 79371 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56cd76a646d986b3107a9df7c233fe078166be2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5a18b1bcb6580675aafbe4696c8fc6926dd780bf6b60272e5d7d08b5fd4727 +size 74269 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38b70fd0e352beef124f6c6a3f433da81de8f605 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecf923ec9c7f45c3b2453fb83b69871337f595cda6ed8778e7f17293175631d3 +size 82584 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8574dcab2cb796ca3fbbe0b95c7f760663878cf7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be4f97d2be0ba395b4f7c936a054a8f64104cc6f4da70a3bcc03ce99ec8cc6df +size 89933 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6221232bb661012b84a12652b3c109c3a7a81ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b705c0a83dee26e3d4caf0c6ad34b428a850817310a07a56d6781e49923f0dac +size 1004533 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ddd7d53a3ccdfd45baa9e4dcde9b4ee3fd143c8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:714f103a2245c152218bde5cfd97455280591ba593ad79615e5c4f61b222467e +size 132948 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a940ed7a333262a75dc3b559f3b2c00747b0074 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:734d0a616713be3bb940dad3d367de7c266e481767fb61bb8f8b42b5f955b077 +size 182991 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a33400b45e29c3dd2ce37fab89d3310584381b1b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:883f1c19f107b1cf967ffa829303b9213f8b89580c84eb742b89a6888bf2b11f +size 27523 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4da87e302c2e9b8680bf048675d12780088ca53 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f7c23da4dc1f027b093c17a580807317383003668fc42b7143f886651c7f2cd +size 120317 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44784ad9cb1a2f6345ddad84e6ad0620f24b732d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1b7b161d05c7b82f85d47a6189685d02b01575ec67c42ea97b89dd404ca7c1 +size 58005 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b04a2f2eac2ed597a37f930c423fc2c0eb39e14a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:551a5d8f9b4a61e4527fbe8c2a60b1effd06caf6a27ef3d3afec1d3c811e39c7 +size 25423 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6147ce04611c8be13c768c4d13d2613fb4b86c97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d058167c75ac66e54506878e8e7205a7a09c3143227f6a65135825c30984a2cc +size 39432 diff --git a/eval-results/mmlu/0/ckpt_105/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_105/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4fbdab50a70c38b05e71622ec09602386d09580 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee4a31efd397210a300b61e355c9d358972c5854aa0a397c76b307a10a2d4d91 +size 32979 diff --git a/eval-results/mmlu/0/ckpt_105/results.json.tar.gz b/eval-results/mmlu/0/ckpt_105/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e608f2164ba2941cd6ebc3d7d6acb83b7e662e30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_105/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f50f714f4f644b62c4d0a703a842d84f720b39382b71b45d7e84b8da0f1877 +size 7589 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a573ee19acc1d195b7f850c1aca3dd52e591e67 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f0b51821df19c6a8ae84ffe103dac5d9c374f182bdc7f5f9fd60c56be8ab158 +size 17011 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b831511d688847b0e69152a38fc2bed6c3d025a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da679c46d14888b98658bb64ee0d078cb2fbb113d5ba0ea6a8b1b8cc5c2e99a3 +size 29780 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..637f542d21c228e376e1095e23b754d11e94a584 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f168072c6b23f709ec19f1f5a4ed7ae9a19292ab7d7b6621525c3bde37e127 +size 39762 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11de9dc847c1beb53f8fe32ebacd17ccc0b5f3a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a38aa8c94ae1c229ef6ba8f1c98328703a3054e6de10b8f441bbc7912b5358 +size 26739 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad76bb89839321579240e719786a06eac1e75cd1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d243f6e584e0e7d372183a48112e5816bc018a7f4e8e500a49f9b3654a79070 +size 61115 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16dfcfbc6010967f9c8f9221d7b73a7ace08cb5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1f78de36442ad8c0da8444901756a8e0823838f5cdb51981a3f52b1e148efd8 +size 40344 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff7958b8f7c143c50e7a6509581cec7063ab31fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8972e71efca32d31f102fb321de3ed345b2d9645d6dc5a9cc0069d0d6bf1f615 +size 23730 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a9770eb69754d896eacb96790d44e8602930295 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42c033a45df28026d076d23318387b295f9d2b4733d33971bf6e9b66531e5de9 +size 31064 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc0e8823b7046ec58efc407e1c7219baaad90996 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63417900d8c4b58c838cc6cc6fdee7d6a798867577342f8d99ae3d1ebedb5f3f +size 22880 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc800b00d6a926e97ef24fa1acc331bd5c42871c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa7fbcde973ce881f093e19e809f03e394e72cacd6285023794559d042108a5 +size 60828 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e338c965d65308a94bfca27bdc5d0de40ceff0f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65581f85e848d3a4f02ed8705761c0557f84d91c27186708ac76cfaafaa3d8e1 +size 25649 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad8f2a3ead99acc51d57786a452d345bc15672ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fb0920ef40e9871f8bb3f7e39e32b4c2e8bed72dd53fd3c8c22775ac1ded860 +size 25735 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afd9ab224bf84c15e5b4152c125088a73d2c8df3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f4174937f6e8464e5fc92e53629d4b75cccd0d2baf3d9d8f248456fcf51321f +size 46455 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fa9e47d94ae1851b928011b33ee337e6fe09844 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c07918df902fee443b3653d0551e830607d43248bbb39fa9ff764f61bc250398 +size 31414 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..340e8db7ac755d33d3ee4b7f8388f15533126cfb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1a18488cbce146c88528d38413647d77f89773392ca9cfe5817522fdb7c6c29 +size 28709 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55c4a4a672d5589790db7a8287a632a2e7a09e53 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be0950324efaf73a8e03c59eef76236f91600f336842598ebdbcda0388d75c37 +size 74427 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9abe6b8e53d4dbe61ce9964ce8e2a9fc6221f8e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a4787d662d4b19ee8a9c82ccde1345025d836fd398ee42ff2d0771094aba38b +size 29990 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6f4e6a70386232a7f918cbe5003fca43a2213e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb63bc271e7d073811d72d3c84741a490560ef84b6617f2a3d57525592ac3808 +size 19056 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..975910805156b92d83fcb79c4b2154452b742cac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c20989baa82e2cfa04a4a8e4a03690d587a3d3cc5c855eef625a0b36e49b3585 +size 87860 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e8bbbccc326381fdc64c49ad209a95d6c71432c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7de928b9d6dc26e81efdbcdeb755eb08eee2ed30183cf46b067ef9715ecd664 +size 49907 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46b2b1cf87660e90822ffe3f23078081e053f92a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021d38b11260202c8c014a25f3bb67cfe4e8bf2fe10e532137db24377b284706 +size 31542 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5afd6a97f3d50e3ac2aa2773c4082c0aef9944c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ea34bfc1fd8837c11086bb581497163cb0fc020fc807b3ef25b61f8550f67a +size 145235 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0736f8ed93cc86d6929da810150d2b327bf21b61 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3715603e8fc11b2f4031a98026d0cc111257a58ad4c5707a4422f02ffb1031 +size 44573 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a11dde8ab8cde83d207bef047b5aa94e76c3334 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be3f276e99fd148115f616e195f3069b5d3bb7bbe6593846b2e6f1e3fa59aded +size 54411 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf86c5c4773a354c491b3d4e53ecbcbf20802dd7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2fa7e93fbb139635352b14403ef30130b3969083d5294d65c159c74f5cf1897 +size 92545 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a7bad23187dcee182e24873ff31d1c1946d7f9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e71705296e624d6a2a73dcc4a29d2eaab7bcb21bc494039f13ea68ef50a7051 +size 56944 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0954068c331d9a95cf2e574cc9466bf2357f2e0e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de496f3c5795988e0396a9f5b73354a2f1ba6e1b086cd06e1c103b5ff98243db +size 58370 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a529620a37d7b16b1d8cbd454e8f11a7670a7915 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cb66e07cf613a44068a6a7cd2b70a307dc821c492d85945d090fdb2cbfe367a +size 42761 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b0bd6bc72f9fd567647901ed1c043b38d9df920 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82f7e99d02e8b516a8f448b86a8503440abf6c47fa35d0f6b8a628c3e1375cb0 +size 143756 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b363566ac2ad6a178474c8ea81bdd601f506140e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36925ae5ae59d28afa4631e153776d1b6d8bcc25cb302019ccf8538882ac660f +size 70986 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f426c8f587dfc941c757e856720ecee2ebe3814 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54aa8cf4b614da955a65ac94849d64a53cd4009ba92a88328e647ed907b1e1e6 +size 161815 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8efedffda327e8934b4e048e80afd107c1ddbc8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77baf7f7ef0cb2bf77988393f5a593ccbe95763ac1b41aa158892c25605b0872 +size 209357 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..794d149c154ba21b46a6b4618bc1e62663369fee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7db55659af9d6a72d8c0e991965706592742b182ed1b5386feb8629c84550860 +size 49823 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..611e91a800ced80081a0d27bc0177a12bb48dbf2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e52182607408b5805c2b2ef9eef6bbcd86ee6b0d50f89c53bf6b290210ae8a46 +size 31461 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea7d45703a98cf87e6e084dafc2f2dfb6759098e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:907e6d2ce87a807247ee9c9e2798334a9c12984417a9e968d9994a406b2f32eb +size 36019 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bef4ace7f7f898ed839c8dbc98769d3442508e93 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ac22e3e3093a9681363c4270d3f62386175a299f76e2eec5587b5c42fbfd202 +size 29411 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cde6b3bc2425a54db466c2905fb55270b6f7f138 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1edf55e9939266dd60d080814685c16cf1ffbc81bd2d1a56be3d39e88b45d5b8 +size 40282 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fff8bd9d2c56b79b75c61d24c4e6662853e9c6da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28cc0ffff98551ef708d93942789daecee3dd9721a3b22532ca02196d31824c0 +size 26995 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbd01dfca41bea2582c882b0802e2c4468a7fd47 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4aed518cc949ce986f7f3038b4f03266f609de0faebb2905900a9c8dd558fd1 +size 21708 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53b09586ba1b7abdf508ceb2e394bf5d4720e0df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc9f180186c45f7a9d8f2606129e8470a365e10c12d1aed4214abc1ffd0c4d65 +size 57425 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f35a60c41473ae6f394de27a88a37c427f837134 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b2763dd7e3cac7fc2a9b198d0e819f4abdb7c5ce0080582f78a41164148feeb +size 22253 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..608aee2a59ee63449c0a5bc24785273595c78728 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0406aaee98ee578e2e4bf79e10ca48da954bcab167c115c9a725ae001c42cbe0 +size 169236 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd79750f55d1fbc671846ce90722aba75e7e8e51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d74ff9ee6d84f67b741d4220dcbf1ffcf1207e14d80b77aa17e545f3e8d9ba9 +size 90024 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f38775925b66158d34120c29c4a5c2493451d7c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f315722628e39a74b6993f915546c5360da8271c0a2b2058647300850ffc9f +size 153732 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a07574cb03e84a3aa59043e99514c346599a1fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88658d55bac24c8f847393c11ed9bc770eb5f48a97c9d1c42ef6f89d1ae5bace +size 79248 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3d051eebcb2e0f7ff2b03cbbeba7a5d625c642a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09be23d4658730ceb7e3ca40ca5c047bea6e667e71d253c7b83712cfc2d15bf8 +size 74203 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea6d4991cb428a7f54a302c77aec4ffb80c68b32 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d7da885d62aa6c760674e0a36bcaf06fbb7d1336a465deb50be6547dbebcf5f +size 82440 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cff21bd0793f3403940ced4ec18bfd49c1c80cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c7fcf66e062cacb94572833133b650744c4f643812ccfd6d27faa43feb28f15 +size 89784 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a52574148a1b3ccad6ffd729bd1c429d46243901 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0443b0bb31bc04d7e03a9727f96f143bcf370860ebd10b283d549d7ac3dab92d +size 1003476 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02af78335492b817160b4892acd7a3d5c49912d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19666b9a340c5d407791aa3ff1ada6ad560d6119b20ad1d94592b85aa83c6daf +size 132799 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..718fa9ac34b58652148724033e5532d5488330a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10455da55d40045652a00c54d947f2572f3aa7ed62c2a90dfa8dc5ac262b12c0 +size 182678 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..412d09e34902f9264c9a9e70da3f3035dd431463 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7cd54e2c95c0a7ee4f750561712fce6987599dcd5952668a0bd15664f0494f +size 27508 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d423b84dce57b00d339a1d102508f423ac352e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:866f691813aeb5f6f9fbc252f99311bab75dca5d6a86a8ca2ba4cd8c31afcae7 +size 120136 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9daacb5275c4e2a6a5c80a5048b99c9a29e67f5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ca35fadcbb97f65a9b741803bf6f2fa98d59d729de6083ac0126ca5de86bcc +size 57970 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d56a35febc890f53b5b9b2dc37e2203d347ebf73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d27ce4d0a1fbab83ebaf26e5de87948f4793c434d1e8a6bde6eb715afa61c74 +size 25372 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88c47e5c43c4c915706737c12b2dd007c951259a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08fc012d70f6c1aab3378f9f1eef1b4d01bb9f591b59b9fd20aa4ee3a6a2d5a9 +size 39405 diff --git a/eval-results/mmlu/0/ckpt_108/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_108/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6dbb5ffbb5e113b8e1319651a0ec96532089e75 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff0a03760b4d844681d373e7c8d681ebdad3ae43bbcb4e64f7484d9443b235d +size 32971 diff --git a/eval-results/mmlu/0/ckpt_108/results.json.tar.gz b/eval-results/mmlu/0/ckpt_108/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e8b7ea56aa7e48e4962acc94c4154bbafaf370e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_108/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8775caf96aea4f7a4a9ac5845b83a40bcc0994185683691b6a8880092283838 +size 7598 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1d5455f403a2bf3101f82c8972796cbea63cbf4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:810156af629b311fa6e1bb979bb7686552f8598ff919fcd3f8738c207dfa8d30 +size 17051 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3b43815197dcd644503142661eb1ee4835961b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4775831e01e0b6fdb3d2cb9dfa69ca2da4b12cdd7ef390aa1a15f1e9b4a67ff +size 29768 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f957971cc9d12db800427a7c737407dfe97b7e42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d43a822473f0d59363467243a01d524d0968f9fcad8e0551aa96bdef2661bb7 +size 39814 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..704c97215f403459a6ed12a9a1eb2db8b7a0baee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27e07b1acecad381b715dca1af89146fe810bacecaca57b913b85a7763b38cb7 +size 26705 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55bee196f0d77a79b822d1ac79af54b43a23b697 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e02627d3ac52d71dcd3c4c2ff0db2b8746c0c5d4e30ff4325282d5c98de99a4 +size 61173 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51b7232e9cf86d6fccbdbb397b8b442ba6949b3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3ae908fefb46b8c60c791da5b629957303e01112278ce21bbb74f925c010845 +size 40371 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40f3c72d0a68910f7a62a31000b0118f95186816 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0906754cf72bf4b3c2b9a61a6c9319c5759197d41ff8d08b1bee45b616ddad5e +size 23728 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d52af6ed86af2cb34e15ba02c426243498398828 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddbfffb4eade7fde1d881a41cff61aa40dd4678b08ac63ab92837140ff844e05 +size 31074 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2815531dce2ad4fad1c538eb160f322d22c5817 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bd5a674de10926a143ffce5e192aa4c2b0294da5300a98dbbe36dcec6088e78 +size 22909 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2031df610b6f1b3d20ef3743a682d6f01f766612 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a147c5e4f3d79150e2b8a3cdbaf8c0041dd44e9ef2ec876925f9de341fe46feb +size 60865 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db7c8d35d59bd38b6fc946292b9a8c6c75374962 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:895ba01e4cb5de788e6d7e0915b9bb064167f0e3d992b00a2a831cc7e1877c3e +size 25689 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa8d6f1836cfeab4347bfbbd2a43a6be8038ef69 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebee7f01cc13f1e760c516e0c4a11145f09798fdddc6ef6d75a6fbf556940d7a +size 25770 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e065a51da678c4ae2ac1270ef6087fd40cb7958f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3a731da92de56445541eecde38db0322c3adef4738cf99c9c803dc458fd7f5 +size 46425 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..711d7ad232623d5212d2ee03b39ce44478b7e9bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a6505d6f6913cff914a4e4eb3c2eb0929d57b21aaa89be9af6eee5d1e41061 +size 31445 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e270cb8251a1a99a9a6a074aac61efd9ac6d38b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1751045389f0b40c53b153d16b541b73d5515067032c50fd70865bd742cb06be +size 28732 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34820fc4a6647d4c93148c4b07f4a597e880b800 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244af31be6394de00dda1c53f54f9a72db79f61c1511d8f32cccc64af3e10286 +size 74404 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acb618a5613ee201cb350f2b004d563d5dab54c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50ecea2340ea073fdefd5195c24a5140a0e8b8bd6f16a538212bbffec95e7a79 +size 30066 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a6c565fe7555b7a4e68ac19beb1a81687acdac6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9af85aa221cf1ce08ce1889049f5f362fdb2a2c2e5df05b65a8a4c3e23fc393f +size 19062 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..436204c1399e78b4855d07d1e5e1ad3f72b7190e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4000268cea756d63d0b366a52fa6cfec3200a47648ae88e0f9fcae9787ca938 +size 87915 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e465ef9597e25bbf645610f1fc6115d2a2ca42d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f1f9d18097574a0fdc96bf58515ffef315405cf0f1880a1e671bdd6cbc7d82f +size 49877 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4dd1f6500aa696bcd2b5c50a9b570f215637e3a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3445c5d1b6252924813ceae1b5c95ebcfa061328e2d2752f18ec4eae9473daa6 +size 31561 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2b8a32798abd710828893048fc6df9f2cb4974f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eaa959a57cfdc2c97748dfa543934c48b52fb3e34e034c41b025b65316988a6 +size 145400 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97a7380e6b4779354bef69cbbdc45adcf579a4b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:083d1a21169455f052dac2afdf8176639f23fbc0179953916f0120ec9f59f055 +size 44581 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a6e24679632b119d83b95a85570ac4f908483ec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6adad9629d81a2db73fff16fb14a48de1dfd11513d8647625463acad95149f8e +size 54399 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..288f5e8dff6ab430145bf8a1320c5db6dc6c68b2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770564bf8d1c567d86e4bc303645f83f49040473725bab25ecd64ab961f6c536 +size 92612 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1530329c5b29db22c973aeed3499918061208692 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:330ea542ca6adbbb24f50ec7ed3471f31f0b8fc25155c7b53d966b9f5f7a3d49 +size 57015 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44b3f208c3e74dd46856b3957cd3acd6fbe66d2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7479eb57c167de657b3cea76d86006a846866c6976f797799cb0eb32037aad +size 58381 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ad0f12f1ffad9e9e920c6814876c4050be71126 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1125a9336a7af297323775a91d8b088214530b20557b4ca008489c087d16439 +size 42800 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c75b2f377f359ed2766fea4b383e0e4ed1fc2a2d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c4e3ff2d3070abbf593965e4b147b4fe2977faad3decb42ca7a0db02b3dc33d +size 143860 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06b8692ae528942f378bb4e8e1bc96d26c30c3a5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9df6ffae3ebbe51337cc03f3c8c43355619b6f441b0b11242d23848dbc03d69 +size 71014 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f352657283a3453af17791cfa9dc780e994a961 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:918b2eb045a9e4c309c4b6ab682ae33d2a722522f48fc5e86d4aeb14f61644ff +size 161881 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6af8c7844aabcee3ad3a70caa0d57f9cd9cd5e2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bc1c5c04239981e60b4d93c67e32b48f94b3651ad3d1446b147f2761ddea0a9 +size 209377 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a965485073766f1faab5dbe2596b0a9910fd3c1c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d1afcb8c140f67a5ddd5a6eb29fbdd5f2b0a623bd77c5d07c445402e9a51404 +size 49847 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..687de414818dd89104706efd18f21b95ed90ec51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db91f6800507190a1dab0aa9eafa350097799cb5fcb14f80637e0ccc4035178 +size 31478 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50ecb1380c12384a56f5b20611274f96608f381e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c636d9ed374ff0aa544dc308032f3207282411ef1d56df97e04e45602ade98ff +size 36028 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44ae53a3dca09cdfd72ea3c507d9b4489f4ef577 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13ed3bfc51f375b5c85b7eacb1c739db356f1d028ebd3bd4d41b173bb284c378 +size 29467 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcd6b235dc1c7da47133e0744643bbe97000ddd5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2967c06c71ddc52782aa2e0d82f759ce89fc826c5c095ff6c2706af5cb2e4f4b +size 40337 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae72bd7328b04d63c7b5a0b0dfad9e09374b6cb4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51286c39dcb861a65d91a445930147ba9ec2c3d80eb4006d29d641b5c9d31489 +size 26939 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2796179da43297e4c3bed2f8d74586f558f6ea0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d3e465be11baa931410daac5f682a94458977ecff5892c0a0e7422dc4efc244 +size 21704 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d0d3b7ef2fd3b02de7ab8223a5afbe4851f10e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93cc3ed71ec1992b12ed4ad622185e559c120a06325c897465603b6ac0927f8 +size 57407 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d8dc800f0ab9ed38f20a128828405a5b533c92e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:194bdcef94437e85cc316a6bfd97767f42ba00ac8a0f485c1cc0b629fe582124 +size 22247 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24aa81a488edba2916e47e2a72d5ddf83a99f20d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e2ebbf0d61d20b1ae436622e129810c52df5625c9a0e3b69a9a080abd1e1d6 +size 169159 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..097b23ea1be69afb1f8a0909b1a17e7e1c150d0d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:765155a050d0b67ce99fbc1fb52834e41fa2b531c733afc540ffe08f8337f471 +size 90127 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f730ac5ac6c9b36cb95d8719dc1b65a862f59f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33888fb90ed4cf79800d9ecd18a82f1d29c5e997f6fbdcb96d16511f60a5555e +size 153805 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f279e51a5e880bacd5d5e91ba5f9df541c15d97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3c3f2b9f6da55623bdd42f53ce29ac0e6b990403d1dcb7567f36a69cde9efe3 +size 79290 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f444ba361bff7c919fb5ee3f01096794a6d35831 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32e5703e40117b863c25aa4fd6d573e7c778bee55da9df09374b65828c008519 +size 74226 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2bcebf41d42f7d7e414b5312b4e6cc5aa404fa0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb483cc9959a079d2d98766e4767b347c6a7a1826ad99544a277ae9c0ad5a0f4 +size 82523 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..623c47036d43abcb4f5d3b4c2f9d6991f0802b87 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b01d6beb416ff3fa1b48f9186173b6e31f91c056d3870a342a73970cac7ff627 +size 89801 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcf40e140b1a4796f680521d4a453cb56f1cccdd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca7bf6f996ce1e20d5c9e0285225374686b4d61be98a6162a406f824174966b1 +size 1003293 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2a40487aab10916ed81aeb5dc405054e2f89f88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cced06bcae5c329dbe3ce8b97fe1f06e0c863b6e6a129d39c274ddebe14695a4 +size 132801 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e22148beb2f91447026e13b58794369580e9ee9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60026193033df4bb4772a038a8230f7f5065b8061eefe5c2ea3d45f2367dd7c3 +size 182791 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4c8a8e07fb66ca9e7a5d13e7009eb219c77e9c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0215004566d1c90d85ff0f07082bdd03dc9a655e0d0ec6f1948a17b998c421df +size 27513 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75e2bd147935fed3462389d6f66079f61a66354e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41984217a966251665c5b36ed32c608b64cd2b290990e4bdc220d4313d17d9e8 +size 120247 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7244481ea083e19273f60a7fdabc59aee461ace6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75936d5489e41a9a67cb8753feee64a89a1487aa897ea73fbf6baf457d0f1949 +size 57958 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2415fea3efc39c88594e2330f4e95fd66b783668 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33d8e2caf11bb46bd0aab4db1eb3b6b19b6d31fcfdd5ba4c75cd1a5337c8241b +size 25397 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bcd7d3c70fde8028a06cadef04619cf099ddbcd4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7aac29a4163330168573ceee3a358a6ea2a2393d6177c96cb147b6a65afe822 +size 39433 diff --git a/eval-results/mmlu/0/ckpt_111/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_111/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..653dee5ed841d2d4b72b7767cd75ef1d4c7584f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e280ed4c7c0d27cab904bc3e0e50306f27544bdc71cbef546b3795843a61a299 +size 32945 diff --git a/eval-results/mmlu/0/ckpt_111/results.json.tar.gz b/eval-results/mmlu/0/ckpt_111/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c15dd77989a0e8c02022623fbd6abe510a15dff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_111/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9214bf241ee7fa5659c3758f3993c4c36b0174f174f0579552e334c72bd28d +size 7615 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a344897eaafbe0548471a711130882cabfe82048 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2b22f28fc23f7cdb1d2ee99130d452e8a4172c2200933040ce00f0e08f8d103 +size 16997 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9df36dfe4775d83f8da4d9b588b68759e917e1c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3af4bf9992d6e357f5ca14cdc184f0b9224217467c6ee7633ef19cfee3caa72 +size 29745 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcbc24ad1d0c4230791fba73f40a9d7cdd98988a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584e95edc4377573455c056a237182a9781cc7535cb31d3355d3f45f6c6ee14a +size 39751 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9f2d2daeb76edbfc5bff00aa95d557d2507b988 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3403e01525f821b38346448a82d83af74523ef128eb8c9c1168303d27a4a0c +size 26697 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad975c3cf9a1eee965edb0e556e41ecac2ca669b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba793cd5cad19a20819708cc5e5beb3afd51e632acea61e3c8e1d91a1bc5df11 +size 61129 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d01dc1d85346fceae5b5151b4c37c760fb8607ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a81e1b4a5f6f6eae79f45c7212f3a8965b4dc895e7002004ce6a2a24780577e +size 40310 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63cf3e5a2aebe85b01cc44da16c81cb28edd9887 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc0be7be77c5660eebf94d6b6468910637ad8f239ca179deef4b6ac02cfc65fc +size 23704 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6347cf13ead2b68320e35faea004fd44ed3c9b91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d5e50a6b9f2bb6d5540e99bf688d2ff8a7225e3f71229e361ce87df0093d40b +size 31026 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9b9690addf1df9c1fc7928f1e3b9b6aef77ec46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da1cd9fae17eb6784482e24fc3c252714c0f628b4b78a4045396cb78e1a108d7 +size 22869 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fef1a7947e574278eb8e23cef75875b6e6aeaa3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5951b6a489faa3f574d2a64e0b55f9e32b03639eda33996bd5e6d23a1c38bd05 +size 60842 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e25bbf56f6187ea22c2138dc0ffa41860109fefa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aafd62a490b166f91e36a6321ed7b2965a139f4aef7806c0bad95984ad064a65 +size 25668 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08f37fa1d74cc2fadbf48ec6186d4858afc967d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6e699c7ce7c0cba5ea9bda445d964e7f1679e5224a0c585029d51c810419628 +size 25758 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e46c940a7ddf5cceaa4b9c93d64b461b806239ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdd97d16fa8fea8fc06c9eff85a4505de4aeafd84b7fcdacabd3a0ed41c8f7ce +size 46410 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f20c753b2b0cbaf1039dc4572c41046f5cf53d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72ab9d9d480899cdc03f02c0f549efe536d13d3b6147b668369089df25f4731 +size 31433 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81eead4b1d75a5bf60c25543bbe726311098d85e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4742f3a910188771d8b45574935060ee0dcf6d294e114b49ecb70abcbe3150c +size 28715 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee78e877e0ba57b50a7401155edd169b444c79dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af5c2a0f096177c79b659510360bd0ab77f5f2e1a899fdb49093b287e09c6cf +size 74359 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..930c8092b88aa7bbca745e3138bff9f7a747e7c6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e80da6bb46bccbf30dcacfed261fa07ddc9c8b8b05806e93f5df7153dcba33b +size 30039 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfc03f6e599ac245392da3cc1092620d784e92ec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9b31e48125ca0520f82293cca82b91e193bf2b35d40543b16b3ceb29ab1a3e6 +size 19048 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7b7e278eede535ab871f6d45d0682f5f37c8013 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f85bd37895d9a89da0103ccf495d16cc6fbbdb617a5e0d52d9e92b433d33cd8d +size 87851 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b29a98be0ae249a0a57efa7fdcbf0aa0c31acb1d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13855b5f919832a79392f0376f70a69c0775f13182a64ecac02e7a08076afbb9 +size 49815 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42ad58362e1b1e28b9fe5c8dae6ebe6f6f474ca0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a98861217c834ace0b6ea8f0ed48ccbaf01fdaf1dc33f32c9daa791057a618c6 +size 31498 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdc92c994665f4c3e2915c7a6cfe88d56528e74c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1da1281d150429e8894e4112ef4ba6927ec0f1f31b442c28451f6b9943bd8219 +size 145323 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01f3a10a00f99c36738621fe41bedcb159076c07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b31a4f55cb6292f6d4a66008e7cc9147385877edd8c290c9d6b992768986f6 +size 44514 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bcd82e6142ecaec3e662acfa8931c35303915e13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8567ff843adcb06120f63223eb2eafe2958e609d78df049930ce6991d6c9f726 +size 54367 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c462c8396dc4da18445a4b38ee449a4d77d0f487 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3167d00caa4d1454e0d74cec311724bc41bf538f2b2621079f165d8208dfa2c +size 92484 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76ea5bdb1dc7409381be268ab3813ab7da58cd62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e2a95d3f69c295dd5fdb34b66dfc7943870832ad8f4a69316670e9e68dbdb8c +size 57034 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97c9159edcd906aac4f70423d78b07ce2bf2d419 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4498c97d2d2eb3f142710a0a65fa678221ab88507084cd07a4bb34c64a1e4294 +size 58328 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1e751928e9130e43d64f4efe9ccf6e11fa8db69 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4059903db1c82b51a5ab3559b1cdb32ce05a716c1ce093feac685197c636daea +size 42794 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec0e18ba014615d7b0b461630e9e2df9db0d2813 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b90d909a386d399675d227d8bbdb588cdf3804729f105c163782581e4e9c4d5 +size 143727 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51df160a4363977895e926af3b0adb1e47a70c7a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec898349d3bdcedcfff5d862464ebdf0fb8d68ad5f1da9df08a82a279b6ee480 +size 70969 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac08cb3084e6f24aa54e16d700d3abc679ebaf68 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd50ec9065b7749ed79b8c0dba9d7f3dcebca67fddd1a011ecc77d6e86155bb +size 161808 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebe915b9aa5a6cfdaebc3ab422dc20bcb0d80803 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e57d354f3c26644032c04c0807257ae8274539648eb0eb1bd11af94fdb047b +size 209250 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b83373ae29e1f64f2078308d756a7ad59d47871 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1bc6273981b27e6950e3aa732072fbf23dfe00f8c0f347ee1ef9ccfc6d8a668 +size 49819 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3e6ce88d30b418fc3a94a89ad140a6ec49ff8ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11ad50e183d83ff71cc146089c3be0cedf38763e8fe652888be858f10d4e518 +size 31496 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3080bf09b4dff5992f48a1a1d8a47936781ada67 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b181adba4366599d97c1943549889a0b5950f437367e664e8fcaf49e1024c93 +size 36024 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..587efb0a361017addc77f38602937bd2c8bccbea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09d98b0797206df548e40ca509fee36855e78f7a5e1077761cb2c6dbc73cdde +size 29457 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88144449a3185c72d47d1500e8efde8dae358089 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c596c277973c02f3ccbdd1c5aa45389af4d5a58d064d6553d7f7c64852dec99 +size 40255 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b3e0eb9a77a0ab639a3e9d314aed22a4507d509 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27e4cb09a4b292f491442ce209446a90addb6d29693e98b00e8d0cb8d8a51f2e +size 26948 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c30d6e48f6de88a30935ff62af4c41b29af3366d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:728fab301282150b399ecd2791ce92712e1008789bcc2849b59e33ced0e6d7c1 +size 21681 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba352103d25c9bc2e77fb43823edc67a5fba0ca6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe091d86901ad2393a20a05b78795ceba884cd7efd755f864ee03ca1697bb903 +size 57380 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33c7f5c6ac7a5f6ad57db634e4c2385b95de1163 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95527c9b210d71bb35705307ae68a2f1cbbdeb2dc1143e8120d15e604b057d1f +size 22221 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84ef0e2b4cdc59332b5f0dfe08b9cfa0ba7e561a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6924349566458b341249b47d1169e7c48d40ddb50d37bef64b810d0d710b4db8 +size 169119 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f465fa1690659a88c2768c6efe6f5a0b5a709aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d84c1d30d0928b6fef4785d3d02457d721068641a21f01bfa95b5773f2429ee +size 90057 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe1692a6ca61096e90d56653eeccc82699e32a65 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86241fbab8bb0b42db8acad27d43754e24f861776eff2cee22d0eb1135c25a7a +size 154051 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f676f34cb494702df41df7ba0785a0411d76b63 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b412223a1fbe8daefec20962c015239b4ee1fddf90d7fbb295ff339a69269b +size 79285 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4cff2bfb1945fa267781aec1a9a1b3920f467304 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:367aea458a64c870262d9e15e040936c90ea09217d009247bafe45c70605fa38 +size 74132 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea2e2bac689c6b7217defeb163ea84447dba82ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0294de17475b63abfc9851a3ea8c544a8a5f97cd931b8cee21e247a3f6cde446 +size 82471 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c746c8c8b00e728756453394c6cc8f68b2371f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe63c9333132bf57be372c58129a44e17dc5c354021f5e54d2a524f4466ff3c +size 89755 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a154d8fe98483d77bb22157855daba83dce164bd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bad74a2fc35d3e1c22effdc6cb6419703026580afc754bc6b0e99b7a6260b3ab +size 1003105 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f2db0ecacaf69169aaf547a54a56847d723dba0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:598334b15a64536f3228f314e09d40ac1fbc75b0adf95925fa5861c9c0281028 +size 132859 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..450bb0cc7b0b8de5a5d2d6275322c1a8a04c51a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e7a6f149c0a20c94eff0073d569e8f3eebabc43f8412950f1d619f8b9ae912 +size 182644 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba62730635ab821cda717890cfe891eef664dc4f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48fe9f90050a4c0ebc6d5e2bee6d7d620179ee5c690fc612cd2dd7bb077c1c94 +size 27473 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8605ef001bc13142ddcd09fb8b799c198ec5306 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39f2d1b624def99982312d27fd4fe0a7471fdc09edbca5f63593ca8bbc16c76a +size 120119 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2cf616eaf6f227b5728137aba530cb6d2b97f6f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ed15d2f9ba7e009c4c28fc259fe7016918738b614d9e1300bf4c4daba7a2656 +size 57927 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14f7ac1d82a3968a502e6979078434bf88b9e94e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fa4f7c14f2beca0267ec4177fd4072980ba49e7d680132c14d22337ef5c6ceb +size 25409 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b015412b58dfa9bbfea6c1313ff2e82eb55698eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87afc4beb7636257d2f075ef2fba90278be8f8786ce6d01485a0e5a3add1ab83 +size 39418 diff --git a/eval-results/mmlu/0/ckpt_114/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_114/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..315e58054ef6052c63b00f3428d4fb7363644335 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb477d6cd653f42c8f1765921196de590af623bc8c5378428678735bec6ea848 +size 32932 diff --git a/eval-results/mmlu/0/ckpt_114/results.json.tar.gz b/eval-results/mmlu/0/ckpt_114/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6dd686d6332a02d5d0117a96fc7946443d19fe0e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_114/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a16ea1044cb4939bd26acfb0cea0a4b65abe7c7a3c745873ced602fc12c64e +size 7609 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19d30161b7ad0802cd208eb74f1bc5e7cdfb1670 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3c4c5b765d05e5fb0ef613a3e002bfa701e701fcfbfb99f84987eb299cb58c +size 17001 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bc1587cbe83e3387247a73475905c894b45c4a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a3049b70ac7ae715b8e6d6fdf051dd8d05bbed95e7f90bc54c54ea1325b497 +size 29753 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19fd2b1bd25a853008d148e6f4a497c88e378a59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb0a81b3122bf9eea022ea4fd5d61d11f3f2f8ee926bd827558a511ca0687a96 +size 39748 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5997d0a8389416ed30fba594e73c00272e6489d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f079894a59a571e43ee2d8e31aa71e9bae876b25957b7f95ddc7df1b36506c2 +size 26716 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4dae123c6e0753b4b0b55eed478f59beab756015 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae798cabcaf457ce6495a7dee06669e1a214d38ce029e6be60845fa460604bf3 +size 61136 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e710d7e5856f9fca3d9395b47a8bfb42710518b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47b6972a05dd3bca95c0bf69ecc2c40576cae4ae605f837a1d9d01d1cc969d54 +size 40267 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15404fc9ce324822b72ce01606f3c51f5e17d044 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63c724a6102bf10f88ffca53ba43ec9b966b1f97393d463d8cb6634164374fe +size 23733 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a841b19154ebb09721fe1b14c42a0066a7580c7a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdec0ea01bcc62f18a242f53bcfc01767a4a131e7deedfcd9066b13e4fe3b609 +size 31024 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..447fde8282b24d8bbbe6ac9b35689841ad753525 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f561a224a0774f29a03188d4a8f73f9c7cab282b4c2201034942b89c4f176e94 +size 22889 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c015808b1cb505a804cc635a398f0bc22cc91e3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e770195564ccfff2c562c09a09b241425aee4892d0b6d9422f702ec9dfa044d8 +size 60828 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..440d7a010f3e62b8904f1ee89c23643f30b7a84b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09371526cd4f32d0d843dfd39bee59d85cca8a55fe0f944423372bdcf9f2273b +size 25673 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8848ace861c44875b1f739cf7a77541c9228e325 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:decc80e749a029161e7c4c290791f6c3515b902829601afcf06dd6e98f860870 +size 25751 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e0836260cbe2c7d7391b98561e8a067401b4f2d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be355df963a357eb427601ea22baa2917395c756ba958774ce4b4b7bb117fb35 +size 46325 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..007f27659e98e58547d1ee3b8afeff40c7063b3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ce3430015a642c7900bf8de5bbb0ca239ae2e8fc370d54f381afab6a6b05cc5 +size 31435 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6930d96476a7f6f3ede5343f9a6f9897c583df99 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c924b0522e8a52181cb1843ef574bc1df1b1f9e9260a13e0b701e4751ea8c2d7 +size 28710 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..947425ad7e371f663c6e1fa54c76841fcb231e99 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f2b833c7088e8f9845069307fb6f534f460b9f1d6d834174a11a3bacf62659d +size 74389 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2076c30226b5be5eae8930adb84feffd2e9231c6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:889a6db43fbc963251d88cdb076bf668ce771e338e0c18f13c78379a3bb15463 +size 30043 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55b0e769efb56ccf90feb8b015f3713601f418df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf2c8948c05c34243bd53a24c1aa5cc5c4b6f50f5ec910855ffeb64df6258e2 +size 19062 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16ca74d3006e94690d010645bde0de363061d984 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a73416ede5ece817e0559cdbc0eef34614c666e43f0d526621587eaefe467248 +size 87817 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59ac7f443ca0c8a8c375f5bbfa3d61a97d863366 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616fdda5e0b7e3529f96a49d911e40d1e629374c053aff8e321db2b489956150 +size 49866 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a28ba41c4ee4ce0ecabfa4f8c812e4dfbb57857 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70a4d8d2be6569697dffa7e007f0d46427b00dcadd519f3c4cd4837348edeaa4 +size 31562 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce914c8d57a995051a753cae9510941e778029e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:085dad0f632bdb4df4b88cea86edcf981b853beebc93bf6b4f6a54657330e831 +size 145332 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98dd0154de7999262d90a183d24725b06ad2bab9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e96112227bbbc6a9c526e65bf3ba6f4b2b49e56e1bafa7b937521604d175d58 +size 44500 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebfcf5c7f299d99e7d1be8d4678bd6bced2a3e9e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b57deb4a138c5bf27edced11e0cce8867620cee055206263ab317a5d86f13cf6 +size 54413 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..078cc1e465bb99175759e29d83308de1bac489c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91064753f2ac57505f97ece0ef9cbfb4805696e20c7f1c11289aaa24bf855eb +size 92501 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2f2a2c2eeac9f826d6982f09020800509a8a6a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49141ec7b0410f6f5b65c0265799ac7cd03033047e551b571659a8dcf23ca154 +size 57053 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37b27cb08dc57b239cae23b5b4583253f93363de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e70bc41f56993bef8b79d90d5d38f0560a6e3009db2680f63cbe0da84ac4bc7b +size 58341 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3e73e38ff2a928b6d3942d6cb5b3a4bee49f9ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6506d9f9a6896de227f21d3613162090b1cd06f4be0353c8822bd5f1014bc6 +size 42758 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cac0f5dd0fea2bc0624c68eb2498114f6282b29 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:518551fd37ab6359117a1673c4467ff0ed87955add53e06e8b64ddfa26746f80 +size 143817 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..209cd8c4f90e0f9525bd3a614306f7ff838d1984 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ad136e121b5092e7d19c2a3d9500e320ae12b6c41ef71d7ffdacdc947cea8bf +size 71021 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36e18ffdeda75493dccffdd9f8e1358538835919 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9caa735d39ddf00bc33a7c67d4ad9ec9a89955c472f59fcbba68bb9508254023 +size 161746 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8881a790b11eb3b4cd484183e8b9795d596ba86 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b59493c76f53b8b561592b692a51984206396765cda8cbd067532ff42bfe5b +size 209220 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2496a5e27613fced12b51003dd155258ec3e2f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e334b1916af478779bf932c4088ce9ebc27e7e40f48c2574ce015a58b360fbe3 +size 49796 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b712180f353ca04490c3775b53950ca5c29d279 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:684994d4d0190f3a8d4d98523bf3d8fc94071aef501edd0958fb66beea0d6a3c +size 31453 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dad4ede355db4df8c7b5cf7ad09ef327030551f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34a728572758fcf357798b43951f2e6a108f85bdcf4b640593fec68d89b1bb28 +size 36061 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..210f3b04711d1dc8057932ef2838d90222ecff0c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c86a7c793d10df791a52c86b21ff1bbaa8b797783d35b774f020a859d9e5a2e3 +size 29415 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0236d3f154aa40ad542b84a3946a03f2a9744a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdb7bef373570aead1e4d2248b2e0427c172d785ded208c44087014f7209357b +size 40296 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b717e1dbf1f57597d8fe2f48149434c7227d94af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c0fe6af275b6b74bdd50cdb62a9f39e19863240b2694ee01143fc123a5e899c +size 26978 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19feea9510470f58f1c6fc4ff66f952eb3cf3914 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb73c0710fe83749fc094bac24804aadb680aa1dd24111d5ae3e21dae48e778b +size 21700 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b5127fa69c10fab3f0b61febfc4eabdbed0c3e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f524a9ac250d17c95081af4c359916aced52f9defa3d2277f5fd8acd51712c0 +size 57366 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..001fddaa65fd654dc2c5b0e81a1b9303aa5c26a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65cfd400f90017d3e10c5a1f315021770e02f8a9bc05edeaa6a2ee21357d6897 +size 22219 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c4015d4e6601e30f64d701f8478d5121df32d9d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7cd840de4cb064ead97b43e61c2a00895bad9df77a1ed3fb0ce78680681e2fa +size 169239 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..682fdc877571c2dc96e5fb969dafbdce5b69c89c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e3211ed54bd3ca7fc5388bcfab801d32aa982e5fafbd4331c1a146b73c3b41 +size 89993 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1ede9bac113ec3ed35c66490ac20dadc22e9120 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fab3ff7969081cc36016c5d7823b80554d4b4546d1f96e33aa2cb31bf932673 +size 153590 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..384c369dda391e6592f2009edcafa97cda28ad0c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29aa38f350e5a419678684206c0496c1ce46cec91fa29d069bcd1f400fff9994 +size 79313 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ab3506efb8f585dbfdd21d89e6f90558c28be1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1ee1944b5d150d8ab6f938402e9d2cff1d2dbd71bb532191af3dd12eea0260 +size 74228 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ea5bc3094e9c722569cf3d0f41d951929d975eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0e665e65f10b2479502b15a7fa9a116edd59a2408e7ad39458c08c8a55b69bf +size 82490 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89b04d63ed5bffc193b46821b6a12740f2a044b2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:116e6d05b9904e3faf4f1c63067fb559b34a0fdffdb691d6a5bb445990eae904 +size 89679 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffe3158d757094a53740d0363d6eef86a32ca77a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b2c60f2eb48019e9fb962e4fe5ef8715733e110b445f81c0a0984ed1103c8aa +size 1002935 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f2869d52a85604cda0011ea1ae54daa96aeb357 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d36ff0ec59d35bafc86ef1ff87bb4739b6c41001b6bc4f7bb7f2e6e3d04c9d68 +size 132691 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..284d8221f9cbf6858df10b3c560e47d174dd5a9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc070504eed32ace731ab3f45f3ab82bdc8b891f06fc1d07882a1d5411b4a484 +size 182683 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71150846020eba989609f09a86ac1eed514eeb56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c5c19c5d4fd25aac1a11c6fdcce76c766bfecc62a90f37791f0380cebf7fdf9 +size 27487 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce6d14a2516c8ee321a831770e24d6f228044bd9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16205c1213b4e0c75bb9aa82728581d22dee848b77fc35bb666d1d282a36aa4a +size 120207 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af0307e5fcd4273a69bd8a9211f3dfbe1dfd4309 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cf6a648568214adb3f85d5a96a936b48ac82bf88c6305f2280072ee5af332ce +size 57961 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7f0aa0eec8d6e96baa32344eae9bf8703b9489e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86228dc2a88de26776e6126fd7567ae7ea8b1e14b11bbd236a95567d1efbca23 +size 25421 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0043a20a5b9f9d7ed0906871687834c4374b84a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d9d033377b6fed89ad1174ab36e07cca6490e2dd57e6bf8d7cebc2125ad173e +size 39439 diff --git a/eval-results/mmlu/0/ckpt_117/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_117/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d86ec2ece2d82440eeb9c96aaf9c37989d8116f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd2870ca7f6d0ddefb10f29e7f67a7f01f32afa96c50c2d1333e850f016a5c68 +size 32924 diff --git a/eval-results/mmlu/0/ckpt_117/results.json.tar.gz b/eval-results/mmlu/0/ckpt_117/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d324ad7dbc1f721a2db45705629a5a889ca01e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_117/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:582344f9d896c7725914938427b2beccb580c33699398b60e06fd70d4a640f31 +size 7616 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebce3e3b86c964bdeda072d9f1419073cb4e89bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10df0b7f897a1f54d1b6c5e506cfda4fb5c8057a918148ed6ca03d2923303afa +size 17021 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4fb91639897ee374029c812f588070ce0054daa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0902d3e0b92f0355dcca5ad511f39b54ac14583bc93e01a035abed14d3a221c3 +size 29775 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b85c4d11bb19c2a458c1156cea6af51861e6f9c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb49b335e846d8f7f95d6328957671e8e77cbfb65f63e98d08e98480161827b +size 39723 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0e1481a4ebc489982d10e1c1f96ed01b7f4d98b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:156476e76e7e994d4c3703f180fe5a424dac15e663a156944d556b6b1a5b5ba4 +size 26654 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e6b03af781fe3d75de860335ecb1594678eb057 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbcc06c961494aac8f1e3296a98450feab84255eccad5c1809ae2894182953ae +size 61057 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d906802c509a9a3041516fee0a410d953dd8923 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d13376aa137cf28ce850ef436657b3f35593f0a0e07d772515e61209dc8177 +size 40225 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f226650635a7cfea4f4ce95260312053219214c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b33ffabd67b4025041e08ac6dc6116570af83fb2806809dfefbd2dde93c1f12 +size 23683 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc564d68673cd02b08e16abdf7d22c0888101142 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f44a540a0895f9de452a8d64d5d0f58345dd4aeaae3d5aa4f0f5a59676a6d6b7 +size 31020 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b7422558ff473c32df2067c382aab3a83f1098d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6a2ed6fbb32d39dc60bd641e3d564cd6ddb5fdfdd073fb5ebcd8e8a7e39b875 +size 22872 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4d00ddc1dc5d0630c645ece6ce7ddf58b6658fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98496d99e3a1429fefa6fa78e83cc0cd6049e0ba9bfb5572a2d527075f0bf056 +size 60746 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac1e56f9da0956f32d9dece83d920815f0e0f017 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a76caa88ecc7689fd46cc1c795aed879c4fe366b24b5dc429b6af583507591d3 +size 25656 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..027af5e38e09fdd742f486936138e50c089b5483 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26cff5fb52e3b75554f29f2aa4a5760682cc7c7c94aadb2027b75311e8a7b67b +size 25734 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87ac2438386efb3a8578da4bf65c83c5605eb583 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:863c3ade79b54c617edc0ed35082d5ccc665203fc0c47c98da5d4e2f1f8f6764 +size 46299 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00d7f6e5f9e2339b08a1fa62e04dc72bb1d0345a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4223619ac1b6a517a89e42541da1d870c734ac7c6315f6a6331398d68aac06f7 +size 31414 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75834df381fe20cfb7586558ad6ebab397762bb6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0592981caba9f3bf8ec6ca915460e3911d85265c68a7258918e4d90e033583 +size 28715 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7dada9023dc9c6b6beba6127cd9bec25196483e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4d5f59a7d36e74bc1726204920b34bd14a2515c16d30118ae27fe38cb33382 +size 74299 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa352c583294a874092398f251b54b1af0eeb31c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00753f0a0d3f32660407d0bfc67db53c3e7322d50de0ac7a2a95207b79c39123 +size 30015 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65518bdc425d3b08ab98ea7793147f5e3459437a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:412c780bfc80cf1cf1ad5983887d09ca88b2f49c481ed467095d8af17f1644bf +size 18996 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdbe4c5bf67e00bb8c69b796d880b767b32cf245 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12cde24dcb05f6434868032b57f25ac019e8f6c8dd19da763e547bfe819e22e2 +size 87759 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f0892844028a7a34b4f29099d50189ae7e01138 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a187f7a5c2bdfdab8d110c28fe28a0907d6209746a5ea7c5bf0b803a59aed0e3 +size 49758 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4de0fa150758f4d6247ef7e543e098529fd27ae2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af7de8cabdce08a1602d0376edcb38af8f47008e6b9977bc5485e13e66095a0e +size 31478 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e23fb6b0308aeda5a6a5ad69f83674b563b9e2fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6418e0603a5c83c90992d674ffb809fa6d80cc0b0d70d0c4258dceed84fc8079 +size 145118 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71f48f3555f685838a7c8dd534acde9c25fffc94 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3e3659fb1cc236bdf3d248fa9b8d61151dcb6b5546e400add9290c27279cece +size 44464 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73561d1194e4e36b65c77fa0360115d92642dd72 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91a3a0031991d65971767a5cdce954029f10f1f216e71fac9543441e1a1c2add +size 54332 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a52fc5b3e28526bbff90604a009f04c58697a31e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5aa52855f1c185d41f2ca6d2abf846b347f4359a4948ae7a8fee04e4eda20fe +size 92375 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4412d7a0aa8ac054c46e287a70ab2ffe91cae61 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12567ff3a8ef5ac5202206b7fe29bc4faa14e7a084d6a6fd64071b54df37502c +size 56962 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4920b78fe19e1399d0fe5d554a8c12c62ac9e624 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:755300e457794683f1fb19c10cc24cd412d974c872ca4808d951a384cc769735 +size 58255 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2924aba42153a4a10b92235c77ac6efc4fa10284 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90dcc903634355695a37ac518408414451c070ab1b766138c02baf61a405682 +size 42697 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb26232b2b2e8be57b753bdd7e6f04c2a05e9e4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a76d6d4402139bf905fb0be24d00f9f12b69682d818affdf51648152c7f671cc +size 143649 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15202c821a37b1a8e4f875937677d7b519f6792e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:101cb345bd3ec0f511afdd041a0ef25cf79390144a871f5b0375eaef9f624200 +size 70959 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8e0731ce2194d6694de923dbd742c34db1bd169 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e8e549850dea1f4c6bbdc9c0027a99417d289e10559a9bee9a01abf557cb4eb +size 161684 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9e9fe36acb45bf98ddf83a5c3d16028add79347 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1dbc1dc3b84c4322f201bc6846afd0b8b746356aeff978eaeac0656f9a5649c +size 209129 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b31d507606bd759dd4a7a71571029c79b1120826 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:402c258b308b8252ceaa4fde155c9899151987f39149ae82aa9b3222c061f765 +size 49701 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..030d927fb4a4d51b34987ea44fc08abe352d3c20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd1f1e670ed65b4edc79338ec39164479a5a69e990aa8c6bbbd6983d7d818e6b +size 31431 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..037eabf31f3d477ba3c3e2dda401c4b154096f38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d5bbba9597afce2dd8a5d56884b8e63a1c7bd3b3e93d78612e9c578417ec25 +size 35943 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..546afac478ca4a8e71fc704c01a827737dab0cbb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa88b5a3aae1a184d682f71c4a4783791f39638697fda91d10705c8eea7d49bc +size 29404 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86a6e95542e35ca588fd49efa137d013590d9e06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:994b5919f05675f6c2b8bbcb2f50ee3275bf01650ba1be4721a15c26d508e3ea +size 40196 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a0336793fde824ce0263ba0edf006a8bf21d450 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347b0247c697b1d80edc7809f5ac29224442fd3759f6eb6b791d215fa6dc56a6 +size 26951 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83114d8498bbd3afce1c0773cd132a8da90d824b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95a4fb8130f0cd3382d9b2b7829dd8bb6f2d1cb885e8ad58f508a5ce91e26afe +size 21646 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc15ee4019162018354179e523ea19de16895670 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de0aef4ce129219c7a9c40df8367fea75adceb935d67d93fce9460d39f32a067 +size 57336 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab66100b5d6cd72c26cc9c8d7a88cfe8629cb56b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741e29205b10a07afba9e588e56497c378287386310aff8265f6c93aad22a0df +size 22195 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb3107cf4a2820c40fb694e46353f5cf2c0bb34d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb29929910aa4abf1ac757ecfb9b2d741ab44c0220b3541cd719f16f278493cf +size 168934 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0ba86a224e00654da96a15b48f9de3e442df8ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a6c9ed98b38cb3a05c7e099eb4fbaf785c00fe99237b2e7c503eacc9e55460e +size 89975 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62de68f9f50085ca90ab2d0de72392e884ed79d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de2bfb67441f393811709868a1dea41aa914d11772239c8425202a9d744e0085 +size 153775 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..045a3ed38c7c666f026fb018943cbdac1bca9fcc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd88d938b3f843c2e25182618adafcd837d6b1f5d93fbef0735d906389dd082c +size 79207 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c322a01398351a86ed634be81b6d26413dfb476 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61acd751fa46c241b50d64fa3f3b27bf122fced4d0941541c37acbfa5e9eb415 +size 74097 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..159807f4dcfee628af2ff4ff2b4a93e96183e26b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b56fe36547e0f6c327839efbb1a6f595f03e7783b74f520fe050ff8663c932d7 +size 82356 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ec3619d49c0e7d45e230588ce7df37f0faffbc5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd876e97daa7b56bbd0d988c2d70c09f3c5991d2b4a4c347b89d0742cd321143 +size 89747 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a86eb334891a04d57ecf57c659a1ec3a4c00e76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929f49bedefe829ebd0e05caea74f4b4ee9fdd837caa4f3e7756ece11896cdc0 +size 1002829 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bb44b87ddeb8aecb933c58945d8fa43137b7790 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a402b2bd627ba207ae4aa53ab66073189ce6e5e612d2aa1b82655bcd85e83ac +size 132715 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8f19c8a844402de02443e897aa4f80646fdbcc0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:890ed6b1b8708e8a7934822b785cf6e65d5595badef1b7c6aa6d12ec08042408 +size 182512 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e595e55e5548c3996bf5a7a76d70f854e9fbadab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af0960191f467dae1e10112032dc22664ed703d30524713ed3e08b3b9b563da1 +size 27461 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..134abd79a2684342c6b4e40ae377955e8e9ebcf4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18da79bdb7b9a4d12e8557302a0b340e0aacbbe8364eacb3ddf7d9411696f65c +size 120154 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..413230e2601db5d4807cf52c9364f88cbc5974df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c83e9362f5d68ea045334c91520134f7fc4f1bdaae53276e48455f97feeb87c +size 57898 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d1e01e31d4724ad3d516ad4e13fc1f2cfea6ca5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eeacef29902e3e143ecc2e729ec4c0cbfbfbd7a596e98f3c1380d63ecfe0def +size 25373 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39413e45b3b1bf3584789ebc4c68b27a1231b2ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3f6d5c3408dfef7151bd7e517657c0a8c1409dc7a29737898ee6aef9408c2b +size 39360 diff --git a/eval-results/mmlu/0/ckpt_120/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_120/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79b514fbfdca4398576f2835219c11f04fecc498 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a61aca8b0eff505f0ae82b84670a2772bed1fe94d1dab6f1ccdbabd3fd633e45 +size 32920 diff --git a/eval-results/mmlu/0/ckpt_120/results.json.tar.gz b/eval-results/mmlu/0/ckpt_120/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b21557f5acd737a4fca2e0f4e554102b1a42ea9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_120/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde6229abebc56b512c5a30395bdbef6baad988b13888e581cea9bdad76c1cd0 +size 7619 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9b53b13d4a41eb4e7805a8bb0354a7f46cfea36 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5f4bca461f43f427ebd854fa584da37a063d8dccb4f811421d05ac4b8e1221 +size 16995 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fe8bb02fa51adf770a8f83c64fcf6808e2b31de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a565e63682529d6487d495b3704ec0908c1427c27397692f992c816bab31fdce +size 29745 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f0b32530b2cd50b47c08ac0f5bdba639038c8fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aed77eb0f798b5cac5d1580e9693934397375e31f34e45ae5b0c2095dd1f39c +size 39758 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7a40c151ddbf10b04fb32fdf9555d634e996f6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0399ec123cde3f7eef805ab74b475abfcafceca0b452aa9f2631d852114dd35b +size 26683 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44785cf857ad44c1c17c1846c9252d89b13cad06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:835cea274606645092f09bbe1295d205f3ece7d8b26d0a6888d416d782ebc23b +size 61102 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc795b914b17ebbdc7d3856afc7914f598f410e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e04b7cc0a0f61c242f52f25b2f53aa97e27070b69d01536755e57f863e6aab65 +size 40299 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f40182a05312ac6a527b49bdd87bd1b5c4389fc4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a7058b9a8daf403c730975dc8eb2c3d017088bd1b72f6a1bd0809c4e34827ac +size 23714 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..719f8d75380e3039e204c8a9655fdfdc8d8d8bb3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42c2d6a23a1c122f8c2227c82a0ebbff38a786f5e6ef8a693ec04f346edcdf37 +size 31031 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f99e7ab3f76e3129e394b5412a39a7082ff93121 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24e90b31c2d955179a1d232129a1f79dd57fa2b23e4914b11033686a5f61c997 +size 22894 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37f00ff6c45209e573e329422aa8e5f7d4b57450 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8668632d38459332176553d6f0a4d26632dd71f23bc9484bad35d9fd5b1fee8e +size 60774 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e818196ab845be5d0df9ed4a638b8f57cd6414e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d912a42ddb317ff69d88eafa3b125402a233d0264e10b08869f07f27d5372042 +size 25598 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26bae41c321a6a38ff4c8e4e5099101218bdc511 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fccea38b2cb1807f851ff88d78f5679ac8a150cc771cb3bed5b62c590bac09d7 +size 25706 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a8e6ab26438cc3588ea3600c66a81521dee37a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d368ddcc87ae1735819695b8894ea2d69a07e2c3000358c18964cdb0fed6c54f +size 46305 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aae3dc388c82417d68b65b24a8fdd2901a9fd407 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5a71084b9998ee7d346b970d2bad6cdbff95badcbb77b72ccf372955d32b350 +size 31379 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1942036121fbdf0c130ad087119fa649e484dbde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a50eb6b8ff75e17039641b4c4d023db8635657dece2375babc7e2ee0bc9ebbc6 +size 28678 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ca9a8bcad27408ae9983b5b51b35317a3e27a13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c483b649267ba5947f4967e61e2b3ed6ad8198ac08dd1c04d29c8f059a1fb57d +size 74249 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a86b7f3c7b6f85cc17cdfdec4340b80ece75bff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba11f95c5717c219bbec16e74b04a46113606bcb72819428e7d40f524cf034ff +size 29979 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23e78ba6170359cf9d2140cce5617395ada0e965 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c09bf0f5bed2249cd6d1794b2c07a6736acce5b8c125f387cd8394c7fb3d186 +size 19059 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a12274677464eff04ba8b6467d59e409f7ee8abd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7a43068ecf6802af0eac5772f97c881b15fe4712e007749676965404eb3474 +size 87804 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0e34704e9fef73fd5be20d3489857b8030b51e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:161d60045a93146756fe5acf8b050b8b2dd8337708084d8b78afe23c173ca43f +size 49796 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c2cf229c0b99fd61e53e30099eb499bc635f303 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deac90bd9bd42aaec3200495897b3adbf712ab7559ef424910ae2745ae1c8313 +size 31507 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c86e08710da72c3a26c5e158a09c080d46450a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a5352da71f21cc09e04ae1071926fbd8c19272f25dd308624e8638721a0876e +size 144971 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64f133b19dd22c4737b13d6751106e337d69f87a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b1e50f86aa67a391855203679a4cb09a7bce31bca90f04db042c9060f2d71b3 +size 44453 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c144e61d084a37c541b42b61083f1a1d9772566 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b653f8579dffdfcfb1cbaf9ec15abc1d91e751eeaef08efca74923d5a6c14967 +size 54345 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7da2a7d5244cd42d6197e0e136034458d2fe978c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec0ead2e0ec0c52b8e3ad3a3f6e3806c059b1acce612404c7bd4798bd9a202e +size 92427 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8793c9737782de1ec0be3bbf7920018624b29af3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da179ab113737ad91ed5ecc449eb5f6e5182d71bfff2c51e3e1efb9984494fb7 +size 56958 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc3c8f37026e81114005746590833e8c1c0a1494 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe7ad940286005a8412e0b68a926e8b232b7aa83e97e7a1933c70ac1866f48ea +size 58296 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..648cac3066deb1801ef2571d3aef49587135b008 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c161035cdc86dfef313f3557183741e66afe18124e50a9e5cd326a2aa11d7b3 +size 42790 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8c1cf932b6b49d8711acbc4e4a5d2e80ba60d23 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a56989292f8d1b6c178153004d04cb732b747debfa43abe50cb376ad162e47f +size 143639 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..145793627e8729f696c091eab2bad1dac6419267 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e666e450f5e3931742a9d15e50bb0f4134b810d264a694797319fbbb2499d724 +size 70943 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6df966a32e17f53cf19e7b01d2cae03a8c96aba6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7750b57cd8e7c3bc3987b3bfe1eeffad9a2b5d16d98f87c713cfa947e5bbcacf +size 161670 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7154e4a060d6d66d05afaec179337c7a0aa79bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ab22c4d30cc833b64bb8a1df2c1097dc07eb9baaa03d287a15ec86e5b7c4e47 +size 209045 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b66afca8e8713704c79fde2250d66e7d6f99121d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:775156c21a8d80b0acd58e91370d2708bc46454bc2ac7545236efe1132f3ec79 +size 49754 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3c82a96c93061e1c67211079c049bda90b7e384 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d467c6eb799ede8ae87051af61d6051381b30b6533d763a431789c7fc9710c0 +size 31445 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88ce1304fff3935b384b0b71890a06326df2a52e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:425e5241195fc6f706e8cabd21fd7efc71dcabb1cda9d6878b7edc2f18832484 +size 35988 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e45fd89d26bf7d3d854820bd7c534d79fb4aeea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95bc4daec718077beda6280410e464289830b7b3b3120d7f3e63e03b29b6fd57 +size 29403 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..058749cf31205a089408119ca27fff118c0eb55f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13f686e538edc86e20b5c4cdadfdac7e8ffe2d4db3b30d05b28f2bc86c4e3ea4 +size 40206 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c021d632117fa0b894a2dc26c91e64403ab9a30b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b50a05433307c4c0bd219254ce355aba6bf3bc1dc0e761d5758b3c8458a27a +size 26953 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5a7546791e1bac6219d243cc8f8370fb8189d6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faf4aa5e02b53ccf5c5b88181fe7c905a848d0a274195c0af45426fab9f5c966 +size 21678 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b408683d432218c34dd1817d808418f9202ba24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ede1f86f70226c973476178aa9f5edc2104f8812d4cd97dea2fa8b3d417db64 +size 57321 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08fac777d27cc17ad024fa2684ea654a61340a49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a96fbee25db2900b7fbc424e2bd93c0f92e91ac8e7a37131b5455c9e031f442a +size 22233 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..230305a3607d4c801064b66aa46aa55a5e5533f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3ef4a959fa66d11ecfc04012cfd95b43bca1ae88af12030c57edcaf830c60fd +size 168988 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5cc7f499237bea27f477216596f5699564026da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b104ee67bd0447addda4a0e14aee776e6b87093eca1765255e0187f4489e1e1 +size 89846 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df1a9f7371bd5e85482de955873d4f7be4d0e668 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68a8de17b498e9cad812cdfc20b7efc3761ca3af6fccf605c48d642821840209 +size 153663 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16571debc6ca8f61534c0b22b250c27025baad3e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a345ecda48902857803c9858a79361e65f2bf2288a8f53ffbd783c0dab1f6e4 +size 79256 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bf95201637b5398691c4bd4d2134f254abdbb4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25f8ca4c84c615a1107d927556ecd0e7ea06ceaa743f773314ea1d124ebdd2b6 +size 74063 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..330ec6275c663f00f0a389348294d6c5284dd5dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab11198eb2f745406a4d75ed5c103dc63b480733ddcb96d701fad2efe32c3b5 +size 82367 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71c563213ee1b323272d0097f541e0dcad53e393 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0234e4c63e6444ec9d03c42e4c92a4f35340fe3e0f27ff04bf7ac8cb784d211 +size 89710 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc5fef10f471172d48b1fc547e3b84d9db568a5e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db8fbc6600b3d4cb4503b7bc87eb09c315e77c517090a62992f8329228ca710a +size 1002618 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acbafacebac6be2588f4b0cc3449a6e8f3b92e36 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b31ba375b370c7226130936e07e1d65577e6b389cabdd7b2c86883343226025 +size 132753 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..323c19df89d7c534e79322fbc59c01fa858e5fbe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f887d0dd8ba7b43fba7cdfd27ae3da5b7b4350ef0b2cc34dfd54955bd6288bb7 +size 182559 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25c25bdab9ea3c7e3b01ef6cfbadb889291f49f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc726e1b01c66323a41f125bbb40d59e31478532f19bd7e36a81e92acbf05a3 +size 27483 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5916a4555c934c758030d268484564d021a3365 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe8a46e3533b42b5a36bc737d5eec041ca60fb94d0fbf57e869c77958fb4d5d +size 120152 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a612c6414ec906bea2f3071e2906b54e19018264 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:092186d3ceb5ffba5a93ffc25dc4511f21189850cf498cf85fa56d27b1cac70f +size 57921 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30a5117f322f18287b6e9c9b93f59a9042caa05a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7adbb2d81c6d112caea855e4fa2d70b7c692d347cc857a2f31e84d4f5a752159 +size 25369 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37f849b9cd7a599e7c692d9cc4e3c3bc90c2a421 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf08046307c7894cfe88b42f5f6463eee330ee4fdadd1c97d32982e3acae83d +size 39401 diff --git a/eval-results/mmlu/0/ckpt_123/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_123/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fd7f7e372cd5140aecaeca8898c4a4db49f4f82 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ccfe0e10eeb64326abb238e34407ca230c71c050b823a32b284183189fa6a06 +size 32919 diff --git a/eval-results/mmlu/0/ckpt_123/results.json.tar.gz b/eval-results/mmlu/0/ckpt_123/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c244fecd1480ba4c0a21bb82fbd8105a9bf4008 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_123/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23d0e27e929cdedd3c3b7fdabf20ad91c5415e6bd40e062230a99f0474b6be63 +size 7626 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4b198c63ae5d829a6e287ebe18e9183e48af787 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eabc895a23d8804268b963ed38b390c9de9f7750f4ee59bbb6939c5430999bc6 +size 17006 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..099287b7a17476c30454ac1e5edf0a3fb521c6e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bfd216d106cb725ab94a87d2d74bbf4da473efe5aef019b2cc6115ba99ce33f +size 29761 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1010f6b36726cf8630c10ab11cde4e62aa063067 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c19159524d4a1b9e19cfb3b907bfe401cd65f5244227fd58cdc88e9c7db84f6 +size 39760 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b096fa7353f654cee99c777592475f238964013 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee4087779a106b225e971b795ebaf9f25344b11df15d128867a1a6775666ba30 +size 26746 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afa99f27441e02576bac576be3cb0e574e2aa861 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:071d5f7bb5b2fc2ae7fca36823a1607d45835491a72b6b2f2457050c70809838 +size 61160 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ce8305607246f27d28800c427ac42914ff59a36 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a7b6716ebc82874ef66bb3b7bcf7f2504f7ca1d8a8cbf3684084e0dfa0ee690 +size 40324 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f1ca822af616a21c7bc3a062d04143bbe8fe1f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:766324018c93190c0f26215925372d34632332c15337cc5632406020c8fc3496 +size 23717 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb939b16866a66abb349b7366228b0d90e3059fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10814689e14c8eb6a21bd09e6078ccaa5a348561e72a33d4d29a9fe699e1576e +size 31023 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c84817aff71e178054a14b3c28d09866437e45be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e338a2cd9016898157eef652e4959acb904c1b9c54fcae52868693b109ce2fc +size 22880 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1aeeb4ff06949c6f9750daf292d18a07bffa1a81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64789033f92cc5eef81733264929a96eb95511902a1d8b18844c29aabb1f20e +size 60835 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c442f5d96dc8d076a70c85beee5449658398455 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d386b7c3a0b3df043a39a85c2ff4b5c6c62298640ea3a5a661be757ab6b2bc4e +size 25662 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ae558aebce574f2eceeec8f3a7766915487141b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a34c5dfe00847739176c5b7be9d2f58fc5a4e97358e49652049744c08e83fdf5 +size 25760 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f85e8ea2cdaef968c4d92d03c775f6647a0aaae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:054507fa43b1974ecfee336cd466da0928632f3d54480ebd8cf9b41c42b2dc4f +size 46427 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..257396c4dd29abbf62d0752fd2834b44c92672ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be09bc10a46519313f9d697e7f568185d314f929786504356c4b2613310311e +size 31406 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7ef903df698fd3834cfa0b28a4d4587e33a464f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71e4ca18a5b044d032cc26822db9f5a2ae67fa91780b11f2f97198233186ddd3 +size 28732 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbf2cae809ca286968d4cd1e633a0ae4b691efde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d397610c572cbfe68e57e372a0f315797840ad8a64a3b8bc24eaca5de73589e6 +size 74376 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5586fcd882a49df27fd3b9205460d9369f18e5ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b4ab3895957c930ed34c1e32001b5b67c78643228ac6d76c0300656f8a0e0e +size 30026 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7834a7d85fcf8e3b685caaf61892d838baaa96f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffd739cb9feea1a1b9fde1e1427a0cb53a54df74408a9370e67bbddd8ac0e82 +size 19049 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea174cd9508088ec97eeb3c2a6353bfed228fa06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8931cd603c0a2e26318c138222dc64f83be675231b4e486dec0179da5d80c449 +size 87844 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d6ec87b9fc5b46edb30babd706d995ce2fcde74 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec493afc01fd3d31d7b97ff0d52b2c9308795feaf613dc1324cd536d9285b892 +size 49833 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..503c5d2d98a08601b072b796faf104e9a9414afb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd0090928deb9f0d959c82113f0b232cc5dcc99e8978aa8c6e45b085a79fc2d0 +size 31537 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..621ef37feb067676d2b1737e9037a0130ba03417 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ef8395dda7652ce5cefff59620d7f07a859c95a1404d706e5e7f4825e351fd2 +size 145197 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a96bf903231913e66b88ed9e6bb9008e16b68202 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8884b5cc38c6ea2f05aec9dcd85507f061e9c7c26efb1861da5969039303f608 +size 44472 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24b58c7b2c6fd6198d15833b4a660bd200c9a68c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:236a52b8dc9156e520e59f9edf1c44ccf5e437dc7901e1ba12c8108cc7a7626b +size 54342 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58a5736eab596145cb6ae99ba8758fa6c483e8b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5c67719cd7899577af0b89d6c227cde58469de991d732d5479738de2ea127d +size 92456 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..649d96d250009566d8f0c7e3b8feb024cb33c4e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af55e1f801b54494407ac9298ef7a7cd0a95d212c4c82534530146b80235994 +size 56945 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0b32e975561cf1f395c1bc5b9d4dbba7c46bb5b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68715fc61509f34f8bc036b1662b211a330c892790cd485d712acc3df733ce56 +size 58405 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad9e3bfba960971dafc6f9af4909c926c079765b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94c4cdf5d08846e1996f405ba0339f3c5ac82268d8d4844b1c39515f959efa9f +size 42797 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f19d5ff15343428227ac952a1483baffe217247 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac35ef4f5e489a3375b863fe7d97f962571b41a54dbcf548c97a49f931d8d91 +size 143835 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..551ab169e21f84f636de60cf105e7a113913a77b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e6765c58165bb1b7e82dbce333dedaa136c8c0ff99a1395279a4e2e2f4cd2c +size 70906 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4acfe9aafa47abfa9634273af11aa15805c3f0ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27daf1a4c7b42d47ea90a48e80aec20a99477b24d5be5814bb091eee2f81197f +size 161823 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d2f91b4b2f33146a502d3f2981b7b13165fe29e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb3bd27d2b6bf02935a264cc88ddb3579fc227cd7142fe5286f6710db062167 +size 209190 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5ea60c9b6367723d90581d7f88b0687d690092c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d058b481c72333feebd17509be21573f68157539ec6e9d076531c0b9880ad1 +size 49808 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2880e4d9c2dbbe682c322f533d10850921f1597 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac1a24df9a1ddb3124d40a766fd2e3b96e72a750ba8f9cf1cfa7fb74fafd77a5 +size 31463 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..616597f77f3324bf93e6b16314b93abd74a5b490 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb8bf1d56e7656e26160abe403e851ba3f989c1ae082f34ad79d8e895e45154 +size 36006 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49738aa4c6b18e245c16bbc677defb070981d8c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c9a08dda4c9547c3151a765e269e1b0fecf67fd2ef140be9519a45504dad243 +size 29399 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bc41a9353691400e947b5da431e2527ed4f8bd0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86cd559d052fc037bb391f42e34e6c7e85ea2280def056d3a641615c4c8eaa64 +size 40219 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf17b791c91302368863032376393d24784e6584 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e976ac57b2b7671f56cd3583490446b89aee32454bacbb5d504b1ab188bb04f +size 26964 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3b44efbc8723a885038ac95d83e98d3d53d17ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e772898c0d0b96fbf490aed8c33c1b93fe03cb6b66835a603626562bd83be954 +size 21692 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cb32aff6488a42d638b0ecd21bc86be0a43de83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1b92cbdd3f73e4d221fb2b055958aaf7800a7682913065b749979bc0d1e11b +size 57425 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cabbba72d4be282a318188f92a4fadc59b94334 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c8ec9aaa52404d826a897d00e7237799421cf9a9b53616d8b2bb6d385e7b8eb +size 22272 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44c54bcbe58763a21639d13b822d1a0c8af6f88b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:331507be12cdb7daae1791b7409e5ea5cafe280594c7777ac767e7fdd92ec1a7 +size 169174 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10608443335302f0035c6f1d72ff810658559f27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e958de8f54c886fb1dfa489a5eae0c16c14a5e1c6d54a6b858eb45ed4976d36e +size 89987 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3da43f5a8068152a919c23088b8d8fe45a6f8138 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8650a0421301fa435fe9fb12d0da08b309a8af2b6ea010e0d99ba3a144f220e +size 153325 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5ae32d7da6bbd51ba3d712797b90d2a64500d38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d654e83e4274ecbf2a3334291efc777cd28d45c983ef4c927e1cf23e56c62189 +size 79257 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92837ae0891f3f7e89b9b74778c153ba42c13256 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ee52ecac49bf2a05d865cecbb05deffe2ee8446d95b5006e8eafd48dfe9400f +size 74182 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fa2219c120b30f6282d30ccafe70c3b0f08d396 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3604c86bb860d7fb8260e53e6d5a8122ed4ffa1c3501fea984fd11d8a4a26d4e +size 82457 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b50b9058664d4ad6651612f03a0a9283dfd0bc12 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b98509c8e4e5e863fbc60f95bfd3e15039e07fa4b589d0b099e20e7c7c86408 +size 89824 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ab5b1977d4dd2e0c6c528aad58cd375da2c00f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc231a1f40782bd6d8264637c841ed932913eb692829e55a0e9a52062466f6a +size 1003108 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4143a19c2031e50760527ad6d67a771bead4d43d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb378be0dd53b6a2043a28e1f728b2697dfaf16243f71c4eb7700e0dffb0b227 +size 132668 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..036fd4ae7c2180269cfb5b65943cdacac0b60543 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ad3f7b6d60219e339e1843a2798af0bc2531e2efcd6c9ea6d4e460c01d2997 +size 182659 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa2b527fa3d2a80ab14e2396c870c0a22b7e02f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9185bf4f94d05be97ff890c37ab796a2ed040bc631e6587883943ac0146c3bf7 +size 27486 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c88088ff752b287184dd719d8555439a7a5e083d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a96ffe5827d2a58fda1cf7ddd2fbab410dafef74658af9802e4a89c9743de8 +size 120241 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2adc0cdd5b729feb964de5a8d946311eb12021b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7a77fef430086cc7ca84c9ea3d33d75da9f00564c41f2412947caf7ea9b3f5 +size 57979 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d27f84d005211e0a68aa46f0ea37a48b15e3388e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:521983c003ff71faaf3300c150a4fad9879b47fa47999fb3093208a836c869c0 +size 25381 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d4fa405516c516dced0c9bbbcb75c3f4c20ab2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:328c81a6703245033801c80f5c4b57486c534cf24a77be95ba7235c9b573314b +size 39361 diff --git a/eval-results/mmlu/0/ckpt_126/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_126/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e59347a869885e515b0d55862f5e18389571b15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8713ea156847842979e9cbc48bd7980bdcf48c25d9ed387050183365ae375982 +size 32976 diff --git a/eval-results/mmlu/0/ckpt_126/results.json.tar.gz b/eval-results/mmlu/0/ckpt_126/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb93a25975d876a8c862cedca126febbed5d9338 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_126/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6fc63a05670e6803c2fed07c4a0ccb53ea79003c91c76f4d3701176ef9aa1a0 +size 7619 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..555ba346e44657ee3971ff936469d9811fe12cf1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61b4f37b9d7b6de3c4f9420b6a9ae0fca8f57ab9c0b2f6bfd48543108bd758e2 +size 17002 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a67a160e5756cdd9bcc0c2f4dc75e2db4290af85 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89150e70078f84a597623644b398bd2d07af1c856b4a9209ce395a47a96b62af +size 29771 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3220ade673e84354907174abbb0feb595f5be9b2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62b18e248141ca714ab9bae961eaa9b0a1c2e296daf3598c0a9ebd9479158c96 +size 39773 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12052e9f47cfa79f80a5dc3cedd802ea40562a0f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c36ff8d795691402c73f8bfb70bc8715468718096f8b2fb330ce00ab00dc3d1 +size 26684 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57f3785934754e874803c8d7fb7803608999cc76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd90fa85c5df254dea1f279b8640752e9de2ee678b612547dfab6b3e911ee734 +size 61105 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72808e0f75a1063fee21705f950e13317c24fe64 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be3b18bc697304405f8e117bf544d709fb90535c55f8fffd593557ba5135d6c2 +size 40294 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f2babfc54343823ee5bff92dbb68c7d32325534 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e858b33c92ed550a9b7a83e8547e9199da9316c4809fc4a5607b4a137557fe74 +size 23739 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..814eb15cf3075cd3e41f5fc1912074344aa63d99 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:669fc7fa80662333abeeac8b1ac1e63099dfccfaec62b4affbcabd0e27854237 +size 30996 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c1098b81308ca0bcf1a4fea75e4806ee876bc25 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6db734858849b4d11e5a501038cc12e199a25e864889d433117c34b9f2800b4b +size 22891 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9639ed4394160a1afaa39c40f19eea28ecb25585 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:669287cc75e0b09d8c8c8853d539a7f25a16ad09c315adc81a612bf816458643 +size 60779 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7806ee70d08f091fa026692b894b9bc5541e4a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b0b4981dc0d691eb9e67c7df3d7dd85dafdf6217a4afbe34f25ce422f72352 +size 25608 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6b63f5297336b6da4cb785c007a6ef5b25cdfef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e59e950820543c88ad979fc1e3da5a4083d76d3abdb5b6f68507c4137b3aa87b +size 25734 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6791569fb0da7907dcf69904561c82d61e7af851 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac25fd46f3db7d1eb7089d51085c49d68b44e6cd3851bc494d86254e04f000e8 +size 46389 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7098373e8397536c66cede02c8404670d2f51d9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6eafd4fe903ae181675c8d3154bd1c28908a8659358bb1cb8de0f31f7caa22e +size 31368 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35f41be081a19edfc6276ebb46a3746fcbf6d850 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73aa0f3bd77a30edfb7e4075bf077987f901a70dbec396724a1e67305ce51a94 +size 28696 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f31066c78971db281162bcde0a341626b94f9837 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56ec31dffee6d4a21d552b512daf8e352d504f614aa27c4263f1730ba6390388 +size 74177 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..584932360df7bc601aa966d0b72e545164d61650 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe5cb02571d0be22fc7bb2f0863c7496013c11175bb88e4c2b12aa6a71e945fe +size 29967 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7029fc512e22d9ceece0d8c4d67046c1bafb59b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a96ce9006679473ba1e12e6ea7060e54b4ce98c8762094850bab40b5f71b916d +size 19071 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72a7ffbc0a53406e5ae553cb022d2a8fd20a354c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40904042658689f809b999e3a9885cae5e67c2039c426fac9ca394018ea3c1b5 +size 87778 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffa6c5616be6dee1c3ca752b2211200f347ca236 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69de3ebd4b323bc2b0ce014447325f3050fbbbb35548b19b5496ae5f9b1db86a +size 49807 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..491d5e69b37d314a98685a06037cab7be0b2af8b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d6e6391d81051d20f5e613fa812e12fbf5fae356c6f23096182b8858da57c63 +size 31531 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f29cb47d47045c55f508102102ef78d1b3d774a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aeef8265192198539641b4f0e436ce03333a67163b5cbdf33649960712943be +size 145174 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..715ee94567d9c83a2485eae7b0258b0fe258036a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:083d2a433149995e263cf285de2e9be6a8d292fa92690877845154f408e291f8 +size 44519 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c5e78686a6aa80d495858d2a1369cb8b76c906f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24ba218f6d1b840d203f9c3dfa22b4b25af7795b325d88d8ab7f100a7ae8d13c +size 54403 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3093f74dc92e3e2bfd5a3e0bfbf631c1ec296faf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78cbbd04ca20286a4172b6ac0e39ae5131ee4ab7d00660e8ad770c4f21428e42 +size 92471 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..193886f373e39cd11575b5efd07e83ba83470203 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9293383fe8057d43b17cddb08036ea18ba1e7550ba7be18ec6fbe42e51f90abb +size 56899 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3a8d63156122774bc4ad3c12ac61a7cf755a20f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72291905b856f957b74b7e876874ff6190e4fe518112ace622b2be0386863783 +size 58310 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d38fd83d9eafd69507fbf2415b0bcfa67d00c938 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53a8b70c8e90a24ec82fd39e9776ebd260e4a55ff74e4fd50fc14b26796a755b +size 42695 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b0989b3959995517b94a25d0b2e9893dde7b4ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96cfd792bbc1684dc73adba11255c14d2989cba0cd0034490121c48ed3df239f +size 143844 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ccfb8b597139ea5ba401804c68fd559ef22ee41 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad2cea33502da826101a367a6b1d9c986d832f5b26f18c46126b9f5115bb775f +size 70847 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1814a9bb5e42e1bfd1e0a815a0ae97f949ee45cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c785801f1e9b208125abef3b867b0c0b2329c14b9414c6e864e5c599763eef8 +size 161681 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33328bc336dc2847c7640326f1d2d3303ea17f70 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9fd178c0dca3da2f8de9d206bf1959ca907fa10fab9a31ed2935c04a8b9cbe +size 209080 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f19aef56336fb7ebb94bd94d91dffa7ee0683852 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45043cb911033289e8e50e22073e50493631e8197d981bf7e6379e829038f7a9 +size 49795 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8858000081df03774311dbca2351e29bb3c8c5c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a124af95f0c59b973387aab4370bae90452449481089aa7a9f820373800366 +size 31480 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35ebbd69c2c91592e8ca53f10a714cfd255f9092 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aadeebf2377f56364080a1d02bc1dc0cdc53a4416da6dd1b50b40ec96966f213 +size 35985 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d9f845cfa0f4612fd89dcd06eaf748bba89e0c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87c94891f6d5ce337d6b8639d26a643cf75ee9372213bd5e3337c4a2b9b20ada +size 29414 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5abdf9d0e1445b102c10af992fa171d6fab0f636 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9c1f4be3ec2692b88cb9499b2f577e58119e82238fe96a6704661976c959a80 +size 40262 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47367e78bf170818eaa6e36cdf7b451303fb8699 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a9111962376c84ca46a5c62784d0c297da5b281f098ae8e816fcdc2a544630a +size 26942 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca950aad5293f2e2a620697adc6c71aa4c95b322 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9629e880fe10e096d9fe701820af39a76dad92884f7698d8daa8abfa1278d46 +size 21679 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..850db797246788970078b33edb22242d8341aead --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c235678765779d942864f79c2a36eccc88ccc4109d9a916c9cbb4fec99c362 +size 57426 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5c21e5129d893c915eac9f9f4bd7a478f621c6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40094f45f826d7f3b6dc29952d741908a0e396c7de0e3e7fa389a933393f9fde +size 22217 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..716f62f317bb5d0c634cd26eb0df87f7788e4420 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aadb83b17436086f8638aae5510d983eba52c882651922bcf92bd2ee4d714e59 +size 169155 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b5f7e4f921bc4e8ffe8f2df6942a6e988e46500 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df10ba0e0afc8a1b3da3e5dd37b0f80f885e7712c22ad051836727567582a1a2 +size 89953 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..607b9ca693c0d29d8c60f4d01fe52cd9c30a0557 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f60e62ef221a365fb7abaa5b09fb3b4bee0f740bc835428e2d6e9c5c9473eae6 +size 153663 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92f084a43fed52c93ea7c6af0e1762b6b6ddf440 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c70b9c4080841386cb54a994a6c0e9168102152e14751d7656a3615d44d22a9 +size 79360 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d09efdedf0ff3b493063a816f81050141dad88eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628e655e8260768e6179d38e1de6e6338a617b54b5ea8618c68a985730725244 +size 74200 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c5760a236b9b24e08f1135298ce665be63e6fc9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecd8fcd7a120e87f531602221355a177742f6a3af3d366ad0614432ed6ad307b +size 82460 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..579b51f06bbbfe17b2b70f8db8413aa6e55ea617 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10f0a8e6eb09796034632292a077075b3bc5b5ea3dcaff6162f3065ab98c4309 +size 89676 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..816b3a175d207e12b0b60a6324c472cc83f8efc7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064eaabe5600369ce4ec30b8b99442049925d5fb22696229770ac3c94307f7ed +size 1002302 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74f8781d80bfe37498372a6e69ff81b9a5706dca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27dce244dc2682dc47ac0ab903057ac1a737b59a7bfc67613fdfedece34bd7b6 +size 132638 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..855ec3e656f7950903b0d7541755d3757e18540a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae4dc1f316ac7a7d7c4e1a62a7508f6660e4a21b5f088ea0d639b954906f3887 +size 182729 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b12bd6e85d871fe9f0802c83bc066378e835166b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d649e13932fb25533d778b0768ad0d0a9508aff4ae79ca6e527f2a20f8c1d22 +size 27485 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75ec85069f0606ea11922d79cd5d3cdf8e3921d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bce06c3b5f13485b3ebfde00d7ed1dd039d6975070e2c6f2b646fea4eb355957 +size 120145 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7433738fdc7d9ca42496ee12afde65abdab6599 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78604ca807580eba369cd57602fb5a41fa4cb4fe7032114b6e2544efffb8e570 +size 57911 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54d7e489617bc542252d53d875205e115fd519d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f597562e863c8ff41f5e7fc7425de9f90955ff942bfde6cd301da5b4a001b29 +size 25399 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04b48e89b337e61040a5cf24fa151ac5e5352f93 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f054313a3114a4f4d9e6151143ea5c49d66b76f9a59e1694b3b481e1c8c1cb79 +size 39401 diff --git a/eval-results/mmlu/0/ckpt_129/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_129/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aaca174f7ab138ff5ad50ec05b58d0823878bc22 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0020f9a6c0d3b83f20436bdcb04d66b404f25b818e64ec3eb6ab2cf7cf4d426f +size 32987 diff --git a/eval-results/mmlu/0/ckpt_129/results.json.tar.gz b/eval-results/mmlu/0/ckpt_129/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a1f5bb01671b69199dafdacba3a16f173bd68e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_129/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49afda750235dc6933f7a1226f8cfce8ac7fefb788abdc460f8e9ecc28084500 +size 7620 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ece839a9261a973662a890f3e6bc45a09053c84e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d3def75816487f63f2992fec2c4c8f96085b5ffddaa88b8f529e3712eb75ac +size 17049 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c95d6ecf3edc0867150bb44e4587a5156cdecf4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5f0e81914992e584a37926156c40b1a68dce41eab9b2d2a27793d7915635668 +size 29789 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbd43b7f258456474d58f858707046dd444b1c84 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74d344fa71cc6dfb9df75283d3e8bb05113e4e92410e7a8da6c369918f9c3fe8 +size 39821 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f835b152a90a4e63fbb9172481dcde64b09d4656 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e360f296080ba22d405e7ce75f7908d51151a93e0c82d2a27576a11cda50670b +size 26758 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0678d1b71874d704befd5469b9d3ada7ca4c714 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02bff1a1ed247d913d59c4d00f3d240be7e38f1780d1ccd2d838ee06bc5a2f73 +size 61172 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18ec88099e268e2cde1dc21e15191eabdf7ac304 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e79e38efce3e08bc77669ff98621b02fe863ef304b5b1fb50d3434919994e4b +size 40360 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..914950fe5b2471a32d5d304946a9577ee3d0d84c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e51c2e60fcedfa0949eecc2415b448873a5f8026312345d8e1ae45d9129d7f8 +size 23785 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b157e92d0830d7256efd8bfab3ee7c62c5942207 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dcaad108ceb9b46ad81f762042889674387ca631a96d2756e9edac38c624641 +size 31063 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a6dc59680aa39abefbc8fedb981710a3a8ecfa7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b5ce7cff4a0fa8a5e7715d4e7cad29c57b3f6eca7bb0969570f5d206ef2df4 +size 22880 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..153ee44f7ad6eaa1dc574bd6082604d75e472450 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0898efbc00ac61ce2548dec242b80efac92995bad743b59279146f4272a3e9c7 +size 60865 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..053e0d0b3252cc7229651559cb47b4ee407af27e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9aeaf4cfcbf4e2292f160ede9b5c7983f6d742fea94ebebb9c100a51cd835f +size 25683 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1c706ca280d9921891b6039dce15e5504a7bd79 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7971fb8e44fe4a60a744dbf02f49eb0b5cb940fec4404575e1091d1ad4b478dd +size 25765 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f4a9d5499242d1bfe796d2277b591eb9ca16bdc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055df7630c18a4cb017fbc58aa05d6694d9ccb3f5a9605298f128e0a6e0c6d61 +size 46430 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45af074b89aa6721d67da06875f7208249878459 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2891d2fca659c31d77191aa5fb087e870e3be623907494ec494277423b70115e +size 31405 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cce02204d20accff75d0e2fbd924d44447a64a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95d5e6960dba2062216c0ac0ee64137a4545603ae43f857658d2b3dfe4c7ccfb +size 28787 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ad0a7fd0b040fa1fba0b39b1d5633f0d0099472 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b1aaf4b738a9b2120759b5e481c4228bb24626c78985940319f54aaeed7f0e2 +size 74374 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbf9dd6c95a25f1ca3a4184377da529f4a703a43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a38b4af729a0efcfc7633323de313bbfc64eeb1e2592d315ff553e6bb1d39b +size 30058 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..374e92c98abd1918bd5fdebdf30f5afbe675b242 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b7b4451de69c69c00808da97fe820d16c199d28de770b574009bd6298dd2dfe +size 19087 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28b05df825283a0bf6f75a794349551319822ee6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b476102ac7dd4cd687eb424fb561ee9a868e2c5d0b448d1da192b281734e41 +size 87988 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fb55bb71457db0d74a30262c56f125cebac76c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4339435101c2c6ffcbd92f1ddf42cc158cc0dd447a06fde29d2f6d071b4660a6 +size 49951 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56b4aeedd00da191e8264d2874d7202d18f273c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d90220f512592f17a6162809851d27dc1691166244aaed28285d62fc5221ba +size 31567 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4d2a9d6755e264470a255ef2a114ada88582026 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b379aa55550c0fdb718cbd53b77f506dc907c6293030cf11833b239fdd45d8fe +size 145425 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef8ec2d3db29d08e083d476eb95e26f083828961 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4de14cdf1b3167748475d1674a6e4cbc8d8d75bbc6feb8655e96881efc0600fc +size 44553 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a7ef2544892259e881c259375dfef9a4f83dfac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f08d33972b0b3da7a724dff23360810ba556ff66c115f949abf8187e6da63d68 +size 54425 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9210b8a65a2bf7d85201219d89bbf571fce954b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2a390f7e2542ece45b8756ff892ea686dd7f59fb85323b5665468bd13a9fb5 +size 92668 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcb80f9aae161ef96e63c11befe7152aad36fecc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5bd3cc24328326f1fce85a69f3839f051216bd28f93aa9faf1e68a660694ae7 +size 56936 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24c8cf6cc3b6e99dc8cb27b7e51883e9bd6c6832 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2a5ba10ffb89283db759a383f54291bf8e86e2c2ed08e587c7b5bb4b667e96e +size 58396 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a22bdf0d72879c6a28c21f8e64f5e8864a1d069e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ebc9eed8d83c99e26927898edae7d3d216c42e0b7b388a3dc612a59e31cb6c1 +size 42773 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fc3bf2bc9ea02df24a8f42d681cd30c87972666 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd131bc5e6c5bfd62a38dd8bac5bd18f6ed5f22559a3fa947b7022c01e7a8d3 +size 143893 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b09561bbf8ea614648761a2eaa962e87ff966cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1c2ebd0c69ccc4211bc747b1bca9a853e5625013a0fe80a29bcb49dc7f80a1c +size 71030 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68f187877f7af2a512a6f4cc461920f22f61638c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b3cb9d92574d6f81515ed9efebfa5af1d396875f72030ec3485ba6c91baead +size 161999 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..486c2f427d8056fcbea8e37ee7eb05454aae30f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35c9ca4ccdea49c9d5649782447df7dee2d1aa82f8f1cbee74266c4555bbde88 +size 209377 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3ab6b8b316dbe69cf76b1b2b050f229a600455f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843300a6e103b64af5d30d00e259ffee8a206e34a887538e92aac140945e06ae +size 49858 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63b141666b4ac0821948e0d75e62a24427b70b43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96591d3a5f09a6e5ea0885421d9a5639432c1056e3377b945424a840bc081de0 +size 31474 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d01b2a578a3692b779d23327d64b7a02048f580e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b252b2d898f6c7063e15365cdaec7b401fb4de5167fe6f3e845a2709fb346bbe +size 36034 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1da317f819284d9516c4551336bd08982c1fb213 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70eb2e9015678904d10c627375c46d8ebbae5f562f770f90dbb79b4624178188 +size 29494 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82272960a899c44424cc68578bf2820dde0404aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30f83225a2d840ee8ce34950846811f901aa03de88a01edf76fb862cc2eb161a +size 40322 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7930093f764eb481768837de5b265e8d6b34f817 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb61fe9b8cc5aef5789460ca11873256eeefa0265d080f3ce08329b8e71d7286 +size 26953 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be3a8cdf5a60d8deb39a95adbf371672b09a6b4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a29b3e53ddb866470c01411ed80299cc06b1dab69f4a8cfb52722039a99bffe +size 21702 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ed32ec186ee849881240961a5416898a0f1411c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996d06ccc6322183fe7671dd84c2fe393f9988cee2f62a7c1238460a4a099110 +size 57420 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a331e956d487e26e838ede0358675af67ef4839 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b04b26c19428c02fab0bd021153bdf18ee3cd6069a91803293fce0ea7aee5690 +size 22257 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84d83e898e8901892461ce51ca1e10eafff3d569 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16fbb97e70f284736cee1001d46113994347fd237bf21a2846b1cf981f27bfc9 +size 169213 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37e437aac5c6889ead109616ddf82bdd8acd653c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68ea13b2faa527d3d233c011832ffc4593918096ecaf5b8434d0b090e93a60f8 +size 90124 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3aa9844637a98c9a4eb626e151bee3442d6053c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad09e8b27397aa196b670816551bc47a2c27b0bcfccdee2e75be492fae6b8a52 +size 153803 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f51914e4c1e1d8076f5e935edb7645aa9cde5274 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3a0fda26d96e490fcd927e6ff1998b06406c4e58cea2daf9bae34b003460baf +size 79397 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d69d85665948d4c41f136352d79c1cf6bc93a6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fceaca04cf487421cfa6c47e9f1fd3c9ff361eeee41a1d85e6861f3df4b89a4 +size 74278 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a4a1e2b1f0fbd7534bd6a031671d7b41212af6b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea3929e496c1b69a0752b0b03ddb2169d5f0963cd7675eca0f131f37b63c2e9e +size 82568 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcd55e10f8fb669a3c8f184031e1153bf130f11e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e76152b1a18d50c8f20fa24b43e1e2835bd828f2c7d493621b4699c2ba4b54e4 +size 89807 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..792adf77a19203d0b051b64821fdba965cc4f726 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af3aa7d3c0345f966adc04a9a9046642699d7c39463324336144bb91b15bdf3 +size 1003672 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f3959b3e5740982b7cd29facb06bcfe8a2353fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6553dcdc960abf559b097d40c451f232bd32f0d95e6bb875b25efe6b282536a6 +size 132848 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15cf23f3eb7095fcfe224566f250acf66669f26a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5cacef76892be17d2e2025329010fc9944d5292e42bcf61f8c0266a271095dc +size 182913 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65243df6b62d1808c687da3e8d715515ba64c649 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40734b6be140aebd08c5433f95724bf058a6c70ad4dd2462ad2837c42bad07aa +size 27530 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15b978938629152143d17b7eaa25ba7e051c8ab3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:130f25259911adb104d999c4c53c8698e400f5464b5bdbbbc278a3c5ca520639 +size 120247 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a3b941dc5b468cc451075ee0d94ebf122c051e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89d00a7b86087e7ad6be5649bfca9c8b8fa0d619077eb4067df8382b2e74445d +size 57999 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a2b4bbf15f828b8065dc3f51b79d7d1c0041dba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf5f9c17339470c4cc481eb4bc05eda6314b22fdc20dcb3eccd4244962b7bae +size 25447 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be469be7e2ee98e419cd66983f94cd7f7664b0d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae8f9056d02cd39046e2e779775ec9d27313f756b11d634de7658b976d04e5a +size 39425 diff --git a/eval-results/mmlu/0/ckpt_132/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_132/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3269bfa937c0e67aa940c8e068c3aac67550efe1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88567e069dd6bf7c3a02fde6cf4ef2b9b1934a77b83d1879724bd8c899f2d77b +size 32950 diff --git a/eval-results/mmlu/0/ckpt_132/results.json.tar.gz b/eval-results/mmlu/0/ckpt_132/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f01b90af9943363af7b9f01f213b22d565880634 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_132/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec262677bbf4f772733a3be508d33ac44a70456f4c36b754202f61116068963a +size 7610 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb35433978c5b3bb6b673d617824d72fafffaf51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b0686d9b9151b80f7da1b00a36924903873bdc42ee55ad3523d8fc8766fc5a4 +size 17030 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6d4b4e9d7efaa52f6f97098fe7a700894a78c47 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb200be7202fe79dc8585cab98b0af102f530232b88b018de3ff8fa7c13e4673 +size 29773 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c469f418e8864ebb2ec0b2514a5cfffed2245c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42457354b935a95b47d89828bfcb894ce8c1ca2831160074c4ab7073692e6ead +size 39800 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0b2975fc067e32a82e0fef4782b8841d87d1db4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bca00883e1448e0f37e9ee9f32943daa97ca6a57d12d2456673a57320185e42 +size 26738 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abe27da33ebf6dc708a0a52a87d6cf06a0d7bd1b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba42a53ff12b314e90246a80c56287bf691eb8f6c24a76050943fe3e3901331 +size 61139 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5c8c72ebacaa9e220ac2366d69e3648b3925fb2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da96586870b1f067d483329078ebe248e0ea639a17be5e09eecfc8e5944a2f65 +size 40312 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abc98ab9342eb68ab8388656eb71bd4a1aedf768 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e32ac61abf942ff3f970c8f8dc428012db6c9aaa1924ae87101a3863255787db +size 23730 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd83c034be2bdca9c89076818991746ce7423ce9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88e884ac0b26d33f477b54e1788337fef17f98391d43c26593c0615c3e36350 +size 31056 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7636c35f10f96b84d2fc981478f55af30658d4ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ab1076f9d048392ce2f6aebf6429c512a29155faa94f1dbaef46ade30e9dc85 +size 22871 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..755973cbd0c46cf9cac7947022b5e171abf2fbdf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f87adb6a1ae83b7c8ef4b9a869f56440bca6ffe1aed814e68c453bdadafa5eb5 +size 60809 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e21a6909fe32a457e94cb9a541ae4686b038ebd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c156e3c99b19eba4454719f2cf8991d8a602d544f1261608a205ebb4dcfaaaa +size 25673 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15abfbe300465298204c4883d6d0971952ea3523 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b759179068f1d015920b1532591be56ec98f205ed3135873220fcdc536afdf18 +size 25765 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ed6984625b300f98ec41b2ad8af3d2e9eb533c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2efb6ac9d545f5bfce64fac260ce90ffa28f5caf7be15789ebe2e6cb0e5e875 +size 46407 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2025229b0edef7c8f3524aa38ad32121e867f9b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96acb42aac388994d6950087644f9cb71d54dcecb340fb11189cd799dd3b4436 +size 31431 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c91b79ab774925aba2b533d4ed6b334ec61e5768 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c2e527c1d21d7ff6b861663fe93cdaccef8c4e00f06a27b242dddd648ab009 +size 28734 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24d74bc2f6983fd7797076a7ac4428d0473a10b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b1e7db76c5830d751ac71ee18bb46f35f6182c6bffbe1370da7a72e633639f6 +size 74443 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a674971f42bbd32fb51e99a3f3c364e13d538d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb506513d53fb10f14b6312e53ed33234f6619e6b9abcd197e420a77fcb4e53 +size 30037 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..823e8c7c36bff2611871763ba8258fb217599c27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d962ba38dd7d17b9f4a0e2c90d88b4153fc54c8de300d84a7e82ba4d770610 +size 19090 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd16c0185fc2ea4bc420aadec036320975529efd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01448cc6791ccaf1de222773c8b033cd7b397b944abc32d2883e158f129ce239 +size 87923 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a168e98252d9d025481d201337887d682868aabf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b716b5e1891acf1ffec8cd9a9dbba181d60ae195b5c1653ef5748c97683e571 +size 49826 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e4ee2a7afe415488b83a9dfe6e708e9906ccd3d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2714033a8b001eb2f6c98d118461e3a445ab5d70bb62feb3659cca2a4370bfd3 +size 31522 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aebabb1ccffc4287bf38810ce0f9c15da7daf3be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919e0a7f374e4f24bb23257f3f05b60f98e959003ce2bebe3e6f1bf17124e860 +size 145301 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37160d45ccfac5200d1621a0f43e426f9faf3b87 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db12c03c73b5bf277a09eb3c406a048d41d4310498fa159e82a3094ba89a423 +size 44541 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2914337ff529a2ba811425bb49eb7500ff954e76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a236a1311c0ab42c5b4c7b2764facaaf33d154d569b6cc6ece9a8bc12b49421f +size 54363 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53ff50756ed68f4da321f458b4c647b9d2844e95 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:319dfd6eed61e6c0d7b16e2a9a0fd48ad313919a6599e11cd14ca14dc1e15d71 +size 92540 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1fbe114af5162be4499673780346fca2e5545b9d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79dd1732b65ddff4b37a8dd37ec0c697d7984d44022c98e9d007c48b71c0a0d +size 56996 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1d3a35788ba558217c71ddd8de4a1be7153769f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20726a68828cf3ad30e4827c42674fdc2cd98c4073ab8296f75e8b709f0201e +size 58344 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c701c69edec3349f279ec8e982fbc4a7294df50a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b98bef018442fc06cee6daeb30557fe626ada1c01568b0b1550ed18200d5445 +size 42800 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1daabb5287e99fbb8ea05a61e03412468a4ce28d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd8d7862baccfd4961b0c62519ac5f60f57c5f6b56b3919f086eb573863ab751 +size 143802 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45344d732032c55394206e70879ce2058a05ec83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090dca44da1eb822c9d93e78ccc5f6d43819551d87802210e9e87efd00f6cd1e +size 71014 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79d072301dbe6bff77c8269ac88ce71251681b56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:223c39d37243fa89b3f62f1b89894aac0e604a196b1f686ab4ccad53ec60c0fa +size 161837 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..191f9e9c5316f8d997fe0c3485d40f88e288c005 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb1ff3d98a5d447de367c04ca029f7df8f686837b43fcab7e8379874d179f5a +size 209280 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cded26cf6443db68d01494de9b6f24356d160e3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476a392b1f83876d68378b3f13f5b0789daee958986e02e37c5bb355cee6f18c +size 49812 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29006a9f06923e3b2fa61eaacae6984f08e469bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06f8ca45bad631d99925df99225dc5e26d7f858ec8ea3e61f085536f2803dc8d +size 31478 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b30df9790838f6379e884e416574af713ded9ae1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19d0bc6f34be5c6ff785744ecb22094b6ea2c96730e5b3790a46753b4d50b210 +size 35999 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1f429f7b1988a35a3b260214618d1713bb8deb8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b9cb799565fa456ddd27721f6c7b8db88ae763667cf4a110792df62c3dd442f +size 29412 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0aede8828409544b568a292fb8ae0f7bd85b9cfc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb4f848e41a8f6b262ddbae86bb5e76d6f8e967c4f5b338208feb4b984d1a865 +size 40252 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..368edb1f09764ad9254277eb3e1a2c7b12e91bde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:705f3583f531eb9e1c6a07ce249e9e7caf25b8ebec7e80fe62a76f43f6e5720c +size 27002 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41c9e624c49d444e2eaeafb5a28f8319928ba564 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e368e62ff1e07fc24507975f043b246b108639d3d21436c218d5a8442ed7ce9 +size 21689 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36cfd346066859b7870b9858833fda11e2f9cbd8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df324bb1fb133a37244ffeccda16a2eb1313d9238ea9a09fb3144238491a82c8 +size 57411 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ac523a6a3ef3f599b0959fed36b224f6b9e635d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399dff293a0b0a2105e70951c8854f891576f52a0fc0b3a7dbce8c376d0db8f7 +size 22259 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43ceca0e117340a89969e1eb69f5f693ecff2215 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e026288a60b2ef39421faca8af78265593dd6ab850963902ff07f7a21855ca5 +size 169231 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a59829f03baf30a7782a150d0d58e3b7da14eb3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7afc0d0c520f168abaf59821a6c376542c3c9fddc05705c212b030b4f24d1e3 +size 90011 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a76dd7a7e0dd2f69a4a60779413500617548a2d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696aa42fbf01595aa089ae1a448cebf08480d13b26619574475f1c2ccfab3ca7 +size 154008 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21cfc6b1604ea590e655641dae1227b9c67c515d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:531265045a6644e19ad7194be300ff7033593b48df5a9134f6aa2400e208f59b +size 79280 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac0d469da916c1968410986fb05db01ed0eef07e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac9fba2e80ff2ee05f673bc4b5d7f04ed8987a4cdd2b2848c9c267549ef9de9e +size 74300 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f547665a0ae6e0bf6e66b9300838233156b1f548 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8be3d97d6fe12c6f2fa91e654c8943c90a37d6e1e983434ae513da6b522324d +size 82493 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30380a810f8298fbd02b331a5614849e5052460a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0b9c99d2a45711f4d4123bddbeed865edae3ff9a9de66c8aba71c55000ab9bd +size 89793 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c93eaa68f27573f77c3639fae3c535eaf19a4456 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d1e945b830e24a9e18d16061a674cb4e90efbbd5e38920ebf7edfc715537a84 +size 1003306 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f5dee7903247025f427d56ca175634833b462e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b28eeca2ccc0e091d82ffca278c834eb4ba2a9759714f6bdf6b5976b26bd13 +size 132835 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2322702f349a7108849f9b0592ad9e1c23ddce37 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091828b6b5767b5218ca47ea9634651f4f590ff5bd9a8a2b7ee6978df1ad6a39 +size 182783 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b46b46fd6af6ee901edc47eb7766ed85af5172db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a496e6a855213128a3b3cea909e1815b3bf96399905685ead258405ed6a1e6 +size 27542 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..210f2f723b85f52fa7611291d4e015ecd6ab7957 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25f0842fda7b048ca000f50852f3ecc2fe90939e762aa4fb9485af8ef95246fb +size 120285 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e591ce9b149715bd8902656d0ccc0b8d64584f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e94dcdfaf86f3bd2c2a8ad975b1b8fd9a8b261986071b92c967d553f839de7b3 +size 58021 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f09330498ef0a3e568eb8b2f0496c039a27489b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e7a3b3e1c0bef4bfb96be465cbd7166858c9b5461f162ada57585d27fe797c +size 25415 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a96d2820d6ef03ac351501b1d365b67f783ec373 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00fb6f112d5b2c8877f79f80fe30b5841aed7be50e5bd1d91202bc0aa34c00f +size 39464 diff --git a/eval-results/mmlu/0/ckpt_135/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_135/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3da06746c2f9becf23be05f33ad00c569deb86c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82db000ef1510d501bb0a55ea7b09e7f6dfe08cd0ff92a98cc216ac9b4373bdb +size 32966 diff --git a/eval-results/mmlu/0/ckpt_135/results.json.tar.gz b/eval-results/mmlu/0/ckpt_135/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c3f86e3a86ee1a60be36d404f3f8c4e1d0f5dbf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_135/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6dba4f3e8b07dded6c8ff1216a65cf324881af06365c0a53238d84dc7868fdd +size 7595 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..669f9e221a0a6c8efaf415242bac39030a6e413a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a3bc96cf797082ecabac1256c7b8b566ae3c314b314303f85fdbf670053915 +size 17050 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1b078d2876fa58a4d228a2a227c91f8ecabcce9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b7c22bc4cfd072a0774f99cc3f833a4d18707901f1aaa790cac4771170056b +size 29806 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5f4b44addb03ee96d2db9533857f3e0a47eb5c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55e1bf04b1dcaaf1ad941580d11a4c4fc4acdb6f9331cb97a70eb1d37d24e837 +size 39823 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0655d5713539a1f7c3143dea27e6e5cf06ce98d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4302edab2bd5de6ec2cc790bd97929b4d833ff3a7ece89de79588d060a1f425 +size 26709 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..efcdad3dab2add1ca04bc136b866d890ef9cb78b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19dcddb4ab73ee2db159e8d24795d9974348403952434fb8ee0629f01cb52290 +size 61231 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f6703f60dedb6336079b5297259ed0aab28953a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:327cc852a60094e1a9e005cb36bf2754b4ea28800657001b1c26cc4e25bc00dd +size 40352 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61090366f4fb8a4b498c3034d8abf96c258266b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd09b626f5a1e76a9553d98f2956a37a0b9f2a87cac11a800517e61f438e06b +size 23786 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bf4c1c4a94352e1bf5475024bd67f742f921d08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f642bb140dea0f1c8f21c68ac71b848f8b033b76965bf617b450ab124d98fd92 +size 31072 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f470ff42d09d4853d11920eede78cfce27bef4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79a9a80ead77b924b6aad42aaaaaed3579b5eec0e1593a274d44533501c92c7a +size 22917 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e73d5c561af59f92c129b2820df27ff4e49574f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8df505665d46c4b8c40e1a84a9aa05e13e979689f7b8ffab214b806b45305ecb +size 60859 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93ac57480e4043609c0e534648ae60dd0f3233c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b0ecae23c04cc402995d3e5b34e14fd9bc5d2b30a70ad1ccc2b934ab174cc3 +size 25691 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cd426aff239efd10bb81029c4b9e76528c2ce6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6480abce41e445a4f365b3841dcd16df8a5e554d6567268eded7cc4ef953a63d +size 25779 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d533fbaf349839c1dacf4c92fea99147f279d3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03d107c3ea22d7eb581c5c46f4a95830ac01d07751d120b6d83bc9437afa9b4b +size 46471 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ddfce099e524c831d15d2de5395b57b4452166b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7183438fce1e1818680ba64dc784b3cf347fbfe75f5525e36bda4a2635c44084 +size 31450 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21e949e8e2b18b7df9a2bd35af4735287955bc1d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d39f6f76b99f2318705a235a849ca9046c4b9bbf6778a7de759d6358e27233d5 +size 28771 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c67085a96687da45f391dc436b24e690bdc48037 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70914c5717ed38e6d8680b35016e5687aed8e5c6eddbf0e8726214cf05c4f273 +size 74508 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..521060497a49e0850783fc31d7748191e6bb91fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71721f0421b3cd35279d4f649bfbe8017bf9048e2e94949e40c1fd871c508b79 +size 30124 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88ca548e5953b9039b42996c45a7b041b4bde723 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9ac9a18cd56dca3b061028edf182ee5839b51af01a61630be67fb12a4573db3 +size 19114 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..837727204fa505cd532cc4741010a3d78f626df4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09bb7c1f660de2aca2bfeabb0e7e369143cdd813b2e3e31d573eebb52d39102c +size 87914 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e272fee85dadc3c5a5a5a958560d0351b2cdebe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a161c82cdbaab76d482d48039595e05395d8f8cfb0967440cf4ca2845c989d0 +size 49896 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9899da9b00507ed8954965ac38db1240c5d4c792 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3db34be0d02336c87f388c5fac30114d48498f3b8524a2ec30211ec235c60aa9 +size 31583 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..687519a9be80309d957834f5f4413910978a1300 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9694281030c2d742ee590c7f9a5b6bc0e6a7a7e61410ddb2e41341eec242c45a +size 145378 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f85f26c53fdaf1d5ed19c2875d8334b475ef065 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a844b8536bcf62b770cf1728651c28a98a33f1a2487ebbacb028a7dbda0aa30f +size 44529 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..116f499cea7cfdbd08b9093ca14bea587a2e3f65 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cb9a250f2d9feb54e168c8ef2c2136fbfc7b8ba8cb096c35f4e475d83331008 +size 54425 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a56664547d30ecb73542632ee21a3b6db749e3b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5a95e27212b33cfc0a90027015d6299e4223d063953edfd82d51c354f649c2 +size 92679 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..259749217bcbd9316eba55f35b9ada15d3fa4ec9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cdfece0b2718b43c945f5f3cdaf915d6d29d361dad41955a583e15fc319d10e +size 57068 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8502c4022f4b764ee1e4c009c1086116365186b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f91a463d1e2166324f54a0a7c083d9fec92be8b9cdb062bba6283e113dfd283 +size 58399 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..194d8da401b36239d0f2f936ccc9b032df4c4fcc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e81ec6f0781a647f4d20f669ea936a16e9d9305a65150a2f595518890833591 +size 42819 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a56f6d7ed7559dfe679fc3c8663745c6d96296d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2930300b8a55508b3a9915f2ede6e80d7491d6a52dddf03df6b7a75c8aa938e4 +size 143902 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f67c0ec0b5a94ed76dee352a40eca3cc3acc8a91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b25f9007b47b867c632e6d5e3eb9b1ea842ab5ec701c04a8a588f2ab8c8d15f +size 71099 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d1ccecb3738db22b61e962b96de7e583109dec9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bb8da3136647f9164bc9c5e500a8fa13f78385be568f5479b434daf49b38464 +size 161971 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00a7774ade9867532891ff901eb23ea47fd55a58 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d88adfb3017f6aa47a84eb8298ff66012e5c8d6ddc4e9948ee8d9754447ab26 +size 209348 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f040c3261e1cbf5b65a853911263774586f4f89 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628f07ce8363499b075dfb5c2a2338907c12ea01bec54f2ac651d6787a685d14 +size 49842 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37c97d7137ef0ee794af15b26dd1fa1a52a82c1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f9e1130064ad40e65876cc31acfd4296ceab35f31087b661ee90265a372ebd +size 31498 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3455ce5744344f942a150ea598942aa546b565b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f118fa3bd171b1a5421c0b7ade327da1e71b6f8bf56829b846e979e2df22f9fe +size 36056 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ef11fb6d591a7d6ada9f8bf4daa99f0ed9832ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:521b9d824d875631a6ac6aa5b9c88262d82ad4a6957f62c22e7d67ebdd7da8a2 +size 29476 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b7f4999d4ea7a39dd83a0f70b488fd14a2f5293 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40b1bd69befed4f7fbf6c6bcfccd41ec488dec39fffe8191f398e12c8026ea0 +size 40306 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb6fcc6152c15025d0679e769eeacc5bf7623917 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a13558bc504e7fda9927a13b4c607c151b35cea654d66428def7873f68a84e +size 26997 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bac411a6c70efb69c883817aacb246bcfe37f84b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5fb83ad82149eb81c56e7139abb478e834ce2a2af44e440ae7791bf78eb6f6a +size 21728 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48c9a1dededb033d840f4aa5cfaa9b14b54e5cdc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b28b6846c562df076dd70aff9a9aa86c9e7c8417673b2970fb2b1146e4e653 +size 57414 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12bc05aea648e2986399c0e25216b1886d0e0451 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56bf5e078a237681b0abe47382728ba4dc0f8a764a93f17c006265363e584ec3 +size 22276 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..499ecf6a893268fb01d049556c44d69987a1f00b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f9b75aa2506ba38b0ba0a751cf22426c1ab17cb82126cc659854337b14057d0 +size 169237 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3872589af49c0a6d49dfe89ce1f0ce8742746940 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd59391d591cbd4f71fc1454374c25e6d4cd2ab535ab28f38a0572be08ab725 +size 90060 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..614fbee37174624e7a9b051adc7a770b62433a70 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5add162c144071e86da89b734e786ffbdaf26b283d1a05f2b935b2e6d2d3a49c +size 153273 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aff435b57648e8eb67caec3382ff8657196a85e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f407d51062a449dc3b4e718d981b053008f97612a06559fd84785dd5a63c358a +size 79371 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40d1296e977fafb438581d35d61667b94725fa20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e81b8bcf9371b9a051cd180a8c565015e4b31709b45193b5cbc09d87c032cf4 +size 74291 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e51053e67644ad4111971238a14f042a144a2d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a737726db2f968addbcde029c39845d7585e3d1b940a1bf7c96f01b234ef4c12 +size 82554 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..055b18a5a1f868e4ce6866e69b43bead4dbd345f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23c8b6e4fa930195682ed879c4c4daf6bd0bdda2010a5c53e6c9a384f16a79fa +size 89832 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..317e56462453485366f6fa17e047ada0470af66f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa19edd20ecd18c0dce242f2ade3f68b42e309d41d584211ab61cd4f7449828b +size 1003770 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01d7587f289686f8577040eb8bbbfe6fb2fe2257 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab6606f5d1f1dc356c5655819d44ec8735fa9640d5301243f8481a5c89243486 +size 132906 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05a6c66b966ef528f3847f1eae529b15ec2f4d02 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b333e7804b17d3ac18b191dc7d36306d3835142e51a9e45309f1bc13783b3084 +size 182856 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e28a81755cf14411680c069e846fadec8838e8e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228d0b4f6a94cffecbd4288925478ab7e4022506bb2b30b6423b108d5d614b4b +size 27543 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e74e68c47d72ed6ec6bb7fa2341a00a3cb159423 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:354d762e12afe1cbbfe0d92447a64c5a14d8c2bb31c20219431c23ee60862f26 +size 120345 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e46d2ea41fada2785cbc20cb80632cc8cb1fa728 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d2a6edf3280dae7a31dee6b7c9a692662a1b2c6824d997fa3d060a1ba00f516 +size 57987 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d85dc47f1c2ba49391f57213048c1af137feffd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06dd18187e90a1e79e40b1ddd42d30ab658003be794c2e1867e8e881b40fba6b +size 25430 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..faf06d2515aaf470b06438fd75d6a9f7a9a6b33c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03c71a6fc20e33a02c987f0e3a425041eac13d69b86d8094f3fe98a9bd06abbe +size 39425 diff --git a/eval-results/mmlu/0/ckpt_138/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_138/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34e921f997c97cfaa98b95790640dae5531110d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b2b043378a3001fb9bcc2eccd115aecfcbde19fa86b14b1264ec4baf2b5d0e4 +size 32954 diff --git a/eval-results/mmlu/0/ckpt_138/results.json.tar.gz b/eval-results/mmlu/0/ckpt_138/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..547a7e955774fa29e600fa36de84b6a35c5aa29d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_138/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e84b11f60e5a5556d78efb0a27cfac7cfc7db37c8ba5274624b429f8896e3241 +size 7656 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b4a000f9b5cb9097c28b55a5bed55da2ec27892 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:483af8c26f3148970802c27a3c920f444476e4a389f60c026134a6c34e845c20 +size 16987 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb4fa00ca3718e740f7cbe17be36356ae4f983ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707e9bec97907258e96a869d94a41a98b3accb4acfeb8f30399f023a0056356f +size 29818 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a699bc2b88f951ef7cef52cf87b4de0a3f359eec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb3c5a61989d2b78130c64cc59e1ef127ee872f74ffc64777a2f1c905c8c04b +size 39771 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1580e59a0459fd25ce95b636c456d09bb5a5de74 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2354800c4011e45cb905add3e6eb6b905c7c4ee2aa9fd6a09a15da65f4ff3b0d +size 26732 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ba292eacc0233a19f87924ed3491ae01a7c8733 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2932243eb1e8f78e22685eae90691276710f84fbceb0aa37cf9904a5e5769411 +size 61178 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9808ee429e2a25ce38180f8d0784bbd28beb1189 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d055fb7d741679044dc641195c466276faec88f6949c15bb3063688e227265f +size 40296 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..635819b3c9ef70cab5d23a04dac159a9a5c1227e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2acbf9a0b2add577e80295788726ee3dcc45214f44daa354001465365941f535 +size 23707 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd2292aa75678a9e0214c0bc401b94768d474255 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6616ee363b2d97649e98468c0e803e9517b5d8d308b124dbff4dfda44903dfc7 +size 30990 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..368329aeb794dcedbe7332a578d44e0c28078c6b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f356c1a3ff63c1dae36200c43a96c21881d805fab8d9005738e8a86cd5e8b1a +size 22875 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f6af0fcebe8c473298b18965443da553b44b1d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:085e8346994dfc7c0a4f041e26ef3b78d7678fd8e784b8441c8d9e42885e5e74 +size 60814 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12a48bcb8a5586203b0569aa24c1cb4d6022c6ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:731f998d29f21d5eabcf7bb100939bf2e6a8420ba483140da265d3a6a13420ad +size 25683 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b73731cbe7d1d941da52cb58f93c21b59337666 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c93bddd473c91204cc7b19224654384ebef88a4b21b40f77954598f2d8c4118c +size 25763 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e0697ad81a0a8d5fba55209bd9da1393ebfe18a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202fde3a49ae793444df5497c62bd7d95f6451bbb3cf06f751b702375fd836d1 +size 46422 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6647e4baf4fb3c40c548e7f7811d3ad0b01e0857 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b40152bd2abccd55e2bc61c5db445ab5c5e43668e17778b9bb4767571ce6abd +size 31408 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffa1431c941b207b6511cd73521e237700d7353e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b98b0637a076045f5c50d5fa2a5979dda999522e066c84fac8c91956f880b62e +size 28752 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3949509c78bfb0ad1bbfcb6d08f630c58f8f4ba3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ba6c975e6d6a02a6974faebf62a84459c56cbcd05d438b0639bf687299aa57 +size 74403 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4499c79799abcd640a537ad159ecc0b5315b9f0c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c0f5898733d166b67662a89c5dd84dac47405716ce68665dbc97f902f248b19 +size 30037 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d52bf5abb880eb4c45e9e8b35cd3becedab8f14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7371501f546a72eed4219d7630b7d9e034d401f95c8c9a1a9a8ebd5f654e07ed +size 19086 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09f1c934634bc6fc2ada1e86595cf63387e6010b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7ecaa486d331fb1c5fbce73a01d7aa7eb89d15333d6426d0534beb1600009fb +size 87875 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3cfc0522cf8beeb5807d30b3768ec314510f5a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa61b66b2801549bf1a0363209c76db2168daf02a9a2d98be857ef58d036a406 +size 49851 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a477594d6dc82972778bf0768d5d6a84939c769a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6da388ea3c14fac47fbbd57e2aafbdffdb557b90fc0d18fb2296222f39ebacc3 +size 31541 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95fd58c878cefd8e24a40ccb73cded9d9ce812a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebd88acf0745977aaab968f567c172a0baad2bfe9d8163fe9cf26d717ec471c7 +size 145258 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..446e5e36e1d5ef1e9c9e1bd3d4e5ad2c1e2578f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:448d9a1f334c53f9d8c8a851beeb033940ae392f0f29facf3df7075c0f30fd60 +size 44525 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..517de03f2ecf8243aab50cbd6bb6b47417015bf1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a20a021be7866e3d4d0a49217511e3ebbf660cd5cd954a41dc81c7cb8c355bee +size 54389 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f40c12e8b19a1ff744c22b240b0a27c0fa1be984 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11fe316e7d1d5f5d1affe5708dca4fe24a2cae1e484b22aeb14f89453b35abc +size 92557 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e46e310833ca395db26d64bd38360638ce9ec3bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af5440d94de4772123b502f2aee2a4d13cbfea663218027405b886c270eb178 +size 57031 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d4ba8273020033ae001d2a30089f405713193a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3344e791b6f365bcc7c1001dc57f5245a5a601a3b2803c6181555127e26bd25a +size 58377 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55bbb6e445d2c7eccd9a0c1c9f5c504c8605df7b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f9d59d0529be927ca6dad9c849513edf8fc03927b65a77257d767bdbf98047 +size 42801 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..749ee310278a606ee9bd8f457e8fbbb6e76a45d5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e385dfb0dc2815c17da350f79bae9ec8fea1f064e0febcef6cedfdabfdf6a2 +size 143856 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a960dafc09aa65a2d4f043d2ba0cc86fc37863ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79ce11531c0b0977a858fde0c10141c1b4d707d1814c68ce92c0c973827b1d8 +size 71015 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1a639fffb344ac977634bb02f4f6c2f6f931c37 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3263d64a63653e8ec05738005ef64a5090c6c53f7ee7cf4016234eb819f4db +size 161875 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49870a350c2bd7e4f45c328473482f6d989dd604 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c05ad01984d56cbd760314c69dadb7c398964c23d0756ba20e424adb7c564ac +size 209167 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96dedb1c57e7b81474b8610a92d82deba3edefa4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3b7ab7707235b95d0c32f2744a63968cae5d2531fda37dd3112bad7f7c2134 +size 49791 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c38b5a13e2aff6a5d2a84eefa809dd8b9091ff2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6996e0a18c094bf8b9c239cbb89400abfdb716c11eb67281d574c7f7afdd8b7 +size 31456 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7a971d311b4bbc437e57aa2030520dad220e223 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cf5ea5a0809e62411d6c3bb212ce623973edae2c0b71f6f5d3037790def580 +size 35984 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd8a76229c12283a4c823ecd1a86ed21340d8dc2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f184ce8679272222cd12185b8014ce9693d17dfdf019ad78ec0572194d1db7 +size 29478 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49275f71dd143cc412b1912996a0be2d0a959f47 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2b12ff17c275c2303c106bc2fb93b87fefcef4db0e0ba674365cbd263b5697c +size 40280 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..760c87ffa6ba1928971d92b4d131fadf72f1814c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a7de4c2e8a58b4b4ce83fd1e17d18e3f015bcb3f5afd41d0f603fa1b13029b6 +size 26979 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..119acb6b726febe42e12a821287559cab2552116 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb9509ddaadef756bf4ebcd02e7961071ea8735ca3d2448dc7883a0ff1a7f49 +size 21710 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98964fd01ab3544a245ad1a1b7930e3bf5dd8ab7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d68e1a45efbbfd8df14a81b5c3957891e1d020bd38a8d0b78a84879c15c90e +size 57435 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d3f58d7020598356d36d3e3da214a8c27ac3588 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea24aa17e904bcfb17ea8419ba20e63728bd7c357dcc5f43ef4209454b5e6ce0 +size 22234 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38b3d99a2296ecca399bb05c8e21baf2f934dc9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81d862a4e5ac7a10bc376f413e2373f6d914f0dccea2c6cefc3830bb8e1b544c +size 169212 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f54fa0ebe6bff9d5063bfa3896f874f07a978d62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348e8516563bc29a4ef1ac31883d53dac02550d8a86e3a4fc1485f3adcbf5bdc +size 90066 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7c9d278988d10b183056df50085975d7f2dc0ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f16e08ef728b26c3d869727e1b8cb93a25c5697774680a87842d206b6cbbb011 +size 153413 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab43e7952415387788baefe1a0f81cb83031631b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0617b34cedf2d3687a37baae3550583d2e61224896bdb51e765f2aab4f24be4a +size 79296 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85701a4266f33f6de70c8b299a740cd7102bf27a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f513f03668a363f6796a5e863d8d988c86eb9dc98d41a66fedd649a24df12ad +size 74220 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66146337acea721473a5e4a2c14624f19bf4a7f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e23d246d7cdede975ca310e6707e68988a6dae374744f8f8a0643ec0fbdc18f +size 82445 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f71976d5fdf85a64b92831d90c516813002f4521 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:481a73718943341439e237fb088e8db34a2d967b7df0119b140390702937ab91 +size 89819 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..942ad9e762363f8a2c5da0bb14e42727614800e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:276be03bba325dc1c5159e8d9fc51189bdfccd5b5cf392cc0de4c8946a5656c9 +size 1003484 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6403b4deb67804124dc6b149494c87c1c849cc7b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59c124a4047627be1d67976ede9863c769c0a3a1481d52d098fb1784094f8946 +size 132715 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a87ee923993083e38b663447b8bdf9b172c181e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251fabcb06c8e5dffbb5ba16c860b0e6e72f97945c34c476d7323d074b700880 +size 182715 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5cf45817783adf1c901413b5ec7b6be28b158ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1370aab09bb8707562916b118e8aad92b4593e4cacb39639d836403c4311dc8 +size 27497 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62f7dcce04cdca935bcef258a45cb6f4836ec22d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c0f18e36177e62c88f4e02a970ff6a05a2bf9de167339806eff9d22d732c3e +size 120277 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99ec975af5865b9fdeb29ce19c171a9be205f908 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:654f19739bd31f5d2317c93464c999c8b1432c1a0679c4f7dec6694058b1a80e +size 57924 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77e575cf548853e63f4b9f9ecda04db12c9eedd4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:092031178054b8a0d030d5021d72e7ad8a19ab13f699c3ed3e3c88219b3f1613 +size 25423 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0e47e27c462747e535677db0c19e14317e776ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e3da8d24c14a329e1d2197093e39c8c77348ab7c2ca99f1b94a93bd5dafca1a +size 39394 diff --git a/eval-results/mmlu/0/ckpt_141/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_141/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..258f466d91262742afaf56f21535e9704d3deb4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b8d92f6f25219b634b7934d57ccd758b5cecc28921656612d0863894b08ab5b +size 32914 diff --git a/eval-results/mmlu/0/ckpt_141/results.json.tar.gz b/eval-results/mmlu/0/ckpt_141/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3905167f408fc0bff269bfcb8c873c784d4ea25b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_141/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e3c757b39570af824999b02555a714fabad956ac6de56750f246468766fe22c +size 7602 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ecfab026a91099c49e0073ac54d77302f9ac7bbd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add0c24c94159d86ea2b34f78e25c23d0a90b37725b461cb1e5c39c4911bef15 +size 17043 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..856bc8347c833989c3316f60af01c730a86d6330 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ab4a7715983b70c8632c23a274c07afbbb43e31deae975668e4e6483b7e5a3f +size 29812 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57d26079d2165e9c18e45f4f02e5da84e33ce96c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9be09baac9977749efbac509af5bc0ca2cf5e7059af858a4c0a4704d9047d8ee +size 39822 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3340959e4634036ae1f415188eb06e6625c925a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5b6836e831bd658614293d6950d4b49d80a716509ab47ca9ab824dbeda81a4 +size 26730 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..758bc95e7ae7a7127dc7f32e7221be1f7cd011d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d66a90a0131c4e95b3f87cf53b35830fe71bbab36646700f218c577b8c88b248 +size 61177 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50ae1610fffda1e2d881a38f5d1a75d218d85463 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e3a3ee3360eea6bee8ccefb03c9f5a061d038dba5d5ba7b7b2d5cebfece3ea7 +size 40362 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83d9fe679aece25c0466c7905ba3123df54b1eb4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad425b3201f4a64319f535ec67c87ac704e111ecb18cb56e90a6e2ac4ef1d2a +size 23745 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5256c32b026666da2063780fe7e18b677c7e2892 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf0a322e5d1cee847d527c10cbfefd8bf45b1c7257228856a30fa526a2db3d1 +size 31068 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57881caf938be7372e5058a285d3fda866b31756 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa34f5dcfae348aa517f24120578780640a1684f3c07a3302ab1ebce7bdb3e28 +size 22881 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67481687c05b9ecf5e63793f919feaa8c20a97d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:325bf3f8c6e2d039b0fcec87ad03c0812f5ea55394abd663e0cb531446a11521 +size 60838 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..704d92701687c343dac28b47bba6b4a3779bcc1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:443798e3ab1aaa7fcd6ee7dc206a095e762c85007d1e225c2c8c751ef1fc7af9 +size 25666 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0a9806b5077a0a21b7395273f959d4affe4e38b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e3f683f908c902921a70ef1479d56aea4c614d9327a19c513e9f1c1f5d624ab +size 25754 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f876335a670466ddbbaaedbe09bd9ffcb918545c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b68b6f4d162db219ff88eee84360040b6e691f8955368a553974f3c81dbce18 +size 46425 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b14dc84c280be802e1db60fe0186f17ad238b8e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f0a7bb549f76358289f798b4a2ebcf3973232398ab476b5e81fba1e50df21f +size 31443 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d20d05dc111bcb755a5bbef6d90663d055558c0c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dec06ec74e0c90ee5c2b3a4b6613686733277d6ec0c19969024d4c5d2ace31d +size 28741 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..220970dcd6f291b43415b5801b3876566213caff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5efdbf7024a97b13ca47d600b9308db49dbdd49f9dce52ff10c16a6f08642e69 +size 74348 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e79f6420623ae369d118835279eed02375d8b9e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f47e98fb62ce112e433b22893a0152b5d650ee29720a95f7deffc3b3746879f +size 30041 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..781fe624063ef7bfad8625dafb29ee65ab13dd2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:123793d2662ba89d6959e79e7784977e3b35aeaa69a154d600da6574dfdf6550 +size 19055 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d50c48250715516c5be5ce75d9b23c52891eb9d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2569f272525df42b91a63187f3dde9f8ae26e0be80845ee85d8133a8c59d83b +size 87935 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fa6d5270a9e16a569b20ebbd7e4526585ab4f5e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b75e2e3b2a8288491582c364ba09af5a3128e622e75a99a02e837cce78909be +size 49834 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b35294570b7330499e602ff2d72399b166c3b01 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6554b8c22911ca61420c3b78d4f6e521ebc8be09647ab5be53ab7957bd45ad19 +size 31528 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e98e0503ff237a7f24b6f0de58fe093815604e65 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8003e4a8718fec1840cb18b9503a336eed4bb76dbf499520bc5af8d1af97e176 +size 145246 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e96588c3ca1a65238e520c6782a383aa324c527 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ffaf4ce4702ddf4d1e01b94dbf10afd9bdd2adc804907b1bfff934298c7c7aa +size 44522 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28b0c0320dcb8504a26c8f60e766a25fa8924611 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f19acbc90b4bd243aab367577bd98cb19815291432f40e2e9f81a29b732b01ff +size 54407 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2af20fbf8cdf9fa6bf4b98f9c9b7c12652a75c5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ee652792b47892dae5e6b9a9e583db5982efb1d861573fd2be1ded33c52e0a7 +size 92572 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c55048b8bb133a8fd6d88e610d2d4f892047b4d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f524111a6aa5fa588645acf53e3201641b2736eba41d85772e7e6172e1e376b +size 56929 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5001a847bdcd87aa6673f46486d7b2b073b4104a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690b1e7105290a3b4d9bfd38c7b11b3c6037a2b209f34b4545b9a686fddfe315 +size 58383 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d553c5dd4763cf3a72eaacdb7aebe2a473a77f86 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56b43db6c2418bce90b3e6671aaae44c0ed8607d78d758c38712760917a1e589 +size 42813 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..337fdbae54ece3e7871e73bdda86525ac4f217f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2a1780525b4bb4e15d6cb2e8144620fcccb87120f514739a0fb25f64185a0c +size 143770 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04f94eb6dc346ad64c8fb554980b1b7b9f273768 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe016d938146d1a67d18d6bf5c4227f9bb66036645bfe58e11ceac7011c9f509 +size 71035 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df08f318c9eb6aff9db188bcc6723b788bb14e60 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c2e695e8ea38ea5c36f69f8cb11e97b47d4553cd30a2d7433d4dff2d6c973e2 +size 161912 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c19126d5a4be2c2db7505e223115e08dff17cf6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab8f9624e191e10ea2e9133dfc62837df4947ecc0e8ebdf9b3d1e28345b34087 +size 209252 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7603a2f06105b5486979e0c2c7460b1a1af9049 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:819f1b86c3b8c238d72429fd050bc59275833c6e96cca80673bf845f51ea7d59 +size 49847 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2509a68819fb6c73aee448acb7acdb13330a8d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61b5b260ddac0e15dfa6dc71240489def043c7ac5aafcf6290502d32e33ba67f +size 31502 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d7949c836619b38f1a38675586eba629a868fd4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8c6e6e1baf6f9f562bb22b9591cde694e3624a8a42ec4f57551c2cb322b2d0d +size 36036 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee61f8b6ea924c25e7be83920f1f6af50dd2ef7e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b53f3df2ea77bcebcc04ef3290c6c5c46b18e238035ea48790bb2eb157702634 +size 29413 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1a274c4af9e7142d86bf9dd1a30d0ea3717afa2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b21d3ad85fc75dc6f026fc4b99c8695e87529e5208917e4f219b770109a173c9 +size 40329 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bcda7c23cc76e0fdd2930fb837c2a436a61f19a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd22836232e3981deb414c7d14138f249d3cf162b558cd6172602d5bbf40082c +size 26949 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9139ba46d35cc2f03782ec790c94bbf20bf4065 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f90e364889b6562c70080b0f1ed964d73230cb5bbe4eee44eea2eae332055d96 +size 21711 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79e030f6742619ad5804ebf13f6cfc51a85906de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f9f8dd389ff02dd23688a153972085c6ba8b1a3ee9756d69fe46418a926b184 +size 57371 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74cc1b3833910d56d03080576a571bc13d98b27e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e4981c5b9943ae21db94f0397ab7dfa2c9b825e7309c033a045d081fb950e41 +size 22231 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d62ed46d579f6ef04449ce2688ff27803335556f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3daebb950368e3a1a9cc9e414de8165d3ff292567883f23c1ec827e68111b194 +size 169156 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ac2957db5e60220ddab4beda59de3022d5570d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18f4edbbb43b56c1e485285d9d1c4540eaa2a9988bd34acce53738f43f41a1a9 +size 90119 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ba9e019742fb104cd364948abf6531ecdaf634b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab57bc727e6eaa9a752d8a133db52183a78ca34ecbe507d996d781ba449dd42 +size 153668 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..203064f51cd254160bc9a6468fb34a91885a3459 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:167e5593f0347e2ec33cfc846fdc4a10b91cda477037711b18a4661a65ab0b6b +size 79319 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6daa3528106920ef244327517b2461757d185e08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:551f4686d8311eda8464cacaf6336a05e5aaf819c33090eb754e757cb183e71c +size 74271 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..451896f4552fd27df52ad2ae7e9b4f5234f5c6c9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:242ae80f473bd2369d07f5ff35dc6392a65c7be0a4d35fc14c7482b89f66dbd4 +size 82521 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5abea010396994b791bc44c91d37426456ec08d5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f77bb19f60bb4aaa8d843bd7a341edc177644251d5cf00bfb3ad8326d4bee63d +size 89761 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36f353be2e1e141beb9112897ea8ec6cdd0e9b56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dfcde2b40d6870daac9e4fa9f5157babe6f72810c089731bdfd28caff4e6d54 +size 1003224 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4deff3e481f9819fe27e871742027d81ca54b6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac29d2af8316c55a78eff3a406affa025b08098cdfcd6ff11a0b545d40330e2d +size 132798 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49ea4d8771d3817b87b646671adf4b41cb9810c6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3be99c25067eb0b196492b4921df00ac3b24e5b4ce2f67b2b55c0b827cfe353 +size 182832 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8d892f6b79053a69e0c905bb9353219a090c5c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db1f1cb83d289268917e48ef1a74ccbf5d94da84005084f0964c73323a14ca92 +size 27529 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb5ba36bc17771db9e82ec7eecdec40b7205b8e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:922c62fbd04e20dc67782d59e8109cf1ef979341cb4c37f8be977899fb13429c +size 120224 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2cd3a5773d3a58f00cc7ca099294f884908f124e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26eba95a28f64ef65891e9079d1f32e8b5d1447623ceca923c8901a08aa4d6aa +size 58008 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d78cf72bbc7e064117fd6be6cdee4c787927af6c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:776774512ddecb058bec9aa6f48916706c84ba34a12ec4ade3d8164d435e3a8c +size 25452 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ed32c417857bf6000d367618832676731c148ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d153d158080b5345a849120dbbba2291f56b8c160b19066c636a4ccceed0dded +size 39378 diff --git a/eval-results/mmlu/0/ckpt_144/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_144/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07139c6a5d882dcfb5de846de64892aad289823b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90fc6e9d7969283745b5873998a040cf8392cea9625fdaab589652890941f08 +size 32934 diff --git a/eval-results/mmlu/0/ckpt_144/results.json.tar.gz b/eval-results/mmlu/0/ckpt_144/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a47ec012f6062562995cf0ff82a9fbf3a493a1b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_144/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb2234345df0ddf49960d1916924242b653dc7dba281e0c5cf288e35f6c8c5e +size 7609 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1612d97d3e1f95ca2c9c3a9a11a931167fd12a9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:997cd2464ae3a774de3f917526412aad9a66746ed996d905912e6e24733bb3c7 +size 16994 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..058bbccf030a368956d2c19c05b93056da92abc5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7694e8d96fcc23ac86031a3d55397f354816481c216e47be6c1eac2d6926b06 +size 29793 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9deeb6bb8b7ffcc09039156e67dd25cc7ef2b9d2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b48ccc96207061209c219c91246c7fb73bcfa3100eb84691bb6ca272bd6dcc +size 39783 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4422abc54590dd8bffc52d0c7fd103bf89a9ee8b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a98e7c81f3256c3ff11af376e44bf6c730a800df81a7bfd685ea70cef68347 +size 26727 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ba70e0a6ebb52ba75654ea24585e6eeb3a6bbfb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed99cbd25bb864b95d3095d75b1c8d9a0ed249236df2996c7fdb55a123167120 +size 61127 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..354f700429a4a7f7c56213641eb255e9e77ac17d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:112467d02637959b0f1f940cba91ac58bdfb1bdd3d4b8cc9f51d45920f9c88fc +size 40310 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44e34cd8b210a434ddfb977805efde44438bcf2d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d043ab27fca0594d923b37157ae08e864d1cc0ae38fea4919f33a246b35485ae +size 23749 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33b47567000f33dd68279e4d869ea4b2f7ece671 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca0e6e9c9fb64fb4caf31b9e1ba033b4879232e30f72553471703aa637eb860 +size 31072 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc7b99505790b9f8d0daf6307cf5775fcd6883f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05844a9fa30ace19e91ba94ce73a2da1ae355afebe8b691b9cbb1b32034f740 +size 22866 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4cba07476b3d23187334717918e8d3fc19a93d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bcae5c34700d5b1f9211904889c37442d48f22df9855201330761a2a8593ded +size 60898 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9afdfe1ca705b52bea982f7a16da26141bc0358 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06a5d1d96f9f1aff5bee55bcdbd236a1caaf6f2bd757b776788432b85c3507af +size 25678 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e30362a98037083b43f07e8b85a490342fe2fa2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9fdfcf2403306216fb739527cc98780b140a091b7ec82f998c820f7372e2ddf +size 25775 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d80d11c138b22bb583a4533b44b7fa633981d622 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00f3087d729a3173e30eafe76cdabfa063f163fc93ec3ac2e08d76fcd9cc9ae3 +size 46365 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f07ab762858d1bfae08f4f3f65904b40b62dbea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c199282c8534c7c2e692d91b0bf2a67da758eb95606d19f7d75b6acec6435479 +size 31427 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fde0f102560cc898da1c8a6375d05c201e29d6f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5bbd8b9cd1f2a3829679a6f2b35c21a3d0aefe5a5421f9d3c7978fb2d7e503b +size 28738 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0857f9803999ce3d68c1a0189c12ce73b8626d7c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49281125679b20ae40c08fad4d1f41c3d4b103d42b3415c4024a3f8119340c2a +size 74238 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a740e660d7719446f509f1ce40208f711829381 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a18798e6bb6dc2ea29e66ba9e0fd7f95c13a50952878352ce27457e39aa97f5 +size 30042 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..443693312efe3bee3ef3e92c250fd7e050c9f7d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:516b3cdb255ca023b64cebea802aa11fc988230e6129004ae5b02ca5c789f006 +size 19059 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7edd43dce25b29cb32c838bbda1026a13d7a5609 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3820f1f5d16cf0a83f6d5e23d2c45fbc8d4e28cc0462511ba307f0fbb8b10db7 +size 87872 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1108a048f84d9f5c9ac8d6cdce185164c6d18dcd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd42eb906198799499602222e938e70a200356b15b5d3a71b145f26c4eaee9da +size 49827 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1cdac8dcd75a1c28a03b7eb21b619dbc7e60a9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e27b0a6d98c80b33f20afe6ee255a21a71cece08ad3907711b5910cd4e21470a +size 31551 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12195eaa97ad0f56edaa1c3529e1817b28281d0f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b1bd17e26dd7ca3a2c9639ebf734e818d192daa72ac37ab884e18d41fbbf8b +size 145113 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d0b10471eb2753ea82d8c614504b1b0614e12de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0f41210431b6a1e376259fa397a65cde165bca8c5574654ab4e40d5d4379057 +size 44557 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8ca3099b0e153ba7031a418a7353b1cd60434f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a29b9b22e2f9799c5c1da40264479f4989e851d2fd84756c155bae45b03b87 +size 54326 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed21278c243298cd62f16886f7172bf5b119d271 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa34c679cc460e8d9e0428f5e9e0f7011f0103d7ab66245c86cde4d45a16f21 +size 92626 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e6d71a22d61388f50097dd124e63a6a2493fad4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a170ee47837906c798fddb95cba74611dadabe6dbba10ec0853d767c678c2944 +size 56931 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a525bbe376af86a51c715cbc33aae9766d2c339 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:459ea9d14f9541e1786e03a308ffd4a487820e34363e9133b3a3a2819ec65dbd +size 58392 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c0dd6071af6309b9226d3e2ff55713c09737a75 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff86d621aae292b487ab65d3d82532e13c0f0c082447a0cc9ca5d422c0d5da08 +size 42817 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9809e5a7de52fa9d8d1522a4dc11fa76364e86e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de59244be21e2e9e0a51fa2f8ab16995d107348a82999613440ca97b06a9d392 +size 143684 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c73c1ebe8a4d06e3f74565b6398521cac27009dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3912fc8e356c0ffef3caa37599cb1c541a50b963fe063c2a64eebf184b8887bb +size 71066 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eae4c869f347f81098b330ad740ca385f8fb83a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c567c6d58fbb73fb4e32f550eaeb002c98840c462c778cf335042bb9cfc8426 +size 161763 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7872e6fa13c6ba47e4ff5d326cca2cb9f982278d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00f31b9a951967f019ac806ccbc60989a43143ab2b2b375766b4b0cb0a9d531b +size 209158 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95db26d75beb0b1657221764c46224ec6a49ac2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67cd3286a8a212314524492fbffbb8119eef90b3f565f0d10e6e2ab0eecfec9d +size 49786 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..103f560102b8d98c5bcb619cd8395ce689bab4ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3d3369d37e1d0db02ee979ab6702f3f0a314e335a91a3419f146778e265891b +size 31525 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2330630685a1d523bd5165435540c331a0938dfc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10782a1aefcccb833b56e55bc257cba637bdff43a49f92a6873de1f92b3dfb +size 35997 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81c6f1024ad388d7debed5bdaebfd3e734b35827 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2434295ddfeecfe635a4ffb71ed825e980971b42edd521ff591248909678604 +size 29445 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d806a6d2bd13377f4a3d43056381bf45bc2f98af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a9a2b702bceeecb633d1669d7b1df7908758b02d349cd22a82ac4fdf4ee4631 +size 40315 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b72441315b8b96bce92053818f81fe447f9bda5a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c685c0ba5eafa593fb709345c8ef9e1f4fd9b6a9c836e48e957b878573263bf1 +size 26938 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a025a7d38184eb861cdece686d23220a009b380 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab5dac55d041a4e82a45b3eb4617af95af49623a382f98134b41dbab09e19364 +size 21688 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..589f3890b510963675f6cffb0abbf467c6e916ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c301158c78a9447bd45233e561f6681c3ffb7364240949522231b64c8d4565e +size 57392 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f4897fd8db68c0eae5453873c10cccf83d1bdb3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab2eaf110f55dcbf82a248eced94b0ae20806cbdf6db7507799abc48bdd7dc7 +size 22209 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e34174fbfd5486e34b8faef0bbf3c27190c1f8c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f7ce0f34041b0163a6c612619492074af51eaddf158d45496ca170c27f722b3 +size 169115 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..015ead7cacf95347720ec4e698f4104e5237e9f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4942d523a06a4a73cd5392cb8ddc3e19e3f556f8c0d32e0b3c6d5d54343ecf34 +size 90027 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2917c3d7ed14c56a66b647d47bfb2335d1a2c90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287a7cc0ed4e98dcfae6ee3b6d759bc9f1f6745b1ee1a84b0f4fa6c78503b5ce +size 153277 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f7778ed7cf579f7109ffb7f1781c6edc746f802 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e914eacea508027b802b96e75efb39fbed90ce511670df4dbb66597b690a5cab +size 79311 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..879682d3e85f590b3ea607eaa66fe31ba15b94b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a23fb1711509d9dbb070725983c307b380d435ff4c2505474a3114e8ed1490b +size 74176 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74bcb62b280a759688854549eddff1fa06cd85e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3fcdf077ce8d430dbc440c4af58b8847170d3cc61d33c9ff9afdb775fe53908 +size 82450 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e271f7e6b83fcaef8322203d04d676dbbb48af67 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:313fc24efb10f38f1c507173a45ebd1a5e7f40420f63e509f59726e6513979e9 +size 89793 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5130f921082e3d99d52ba5f6fd25a028abeb32b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85959bb229c7861c79240430c90de9017596dc5318ba9f1d38f37490a7ab1b04 +size 1003632 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c40dd23598221a67b2868ac9ca2af005943bec01 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ffeaca7c1473c58f63a1b481877d4297b5671176a8f7f57bace86089b15837 +size 132721 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2e3f4becb0995124f21ab039fa0da777c51c74f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6d79101b70b001d3930512af021606f7595c205f0bf5f44ee4dea8b646d4d2 +size 182745 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbd67747a55cccb7b016b8fe835101e725d95ef1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9698f1cf6c34a5fc57f76fb47beb1bd8424d246734e1cdf4dee212e11ae2e965 +size 27507 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e328bcd0c066439b49bc2e516dd57b33baf29d5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a69cea08eb10b007aa71640a92d8ec19ac2bb531dec3f8856a8862c88e0066e +size 120295 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19a05277b6eaa528d2d9f03fc69e25a95922320d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde79652c89cab5a9a9bc976f6d13c776e6a06cfdcab15e8be44a97c5ddfc156 +size 57916 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a11a22c61e90d606f0b3c99bb5a3b0d2b94d81a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcf75f5c997c4d85540d8de56f209062148ee0798c159a4ee5fd89870693a966 +size 25397 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14a7f5b8ae145df37a4020ae4c90c079fbe022a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b1b5ced97f7cce8eea99c7b2df5f6d399e0b5c2f411a4f9a309617e0200ab5 +size 39410 diff --git a/eval-results/mmlu/0/ckpt_147/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_147/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4696aa704933b43dfcb01f48761eb5906396693 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adbe5d32c30fdfb4893a60efa00c1730173a47ccae338e0a87b6995fd8992f7c +size 32898 diff --git a/eval-results/mmlu/0/ckpt_147/results.json.tar.gz b/eval-results/mmlu/0/ckpt_147/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90227ed00079f28ff80e1b1668768086516e2586 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_147/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b52b0123b0230b5a815f7eefbf012e710a27b3e9b90afe7b2baf02ffb1b7e76 +size 7631 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a3116a7933c51dee21625e0d132c5fc2942f641 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2744561f73d28ecb9568dd4e735a38a95850b8a78d89fc07a052158ca7ac5d6a +size 16987 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..026440645e75b48cfa92eaaf7e4f13908fa18226 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be28915ac77d3560203309c001f21ac420bb2fa5edb7d66d1e63a035bbaf6700 +size 29844 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..549c3ce0bf83ef5e333ea9d3673b0875fb0b39b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a923bb5707270b019951bebd1cc9bbbf5ef65c9a73caf5ca901b89f1806e359a +size 39788 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cbddacfa0376bf7aca419a6802c9c44a3f43594 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c402588bd7b2d5e3b369f2db9e0dc10a9212822bc8920d6b75d894be415680e9 +size 26779 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c15d33f8accd6086ba843083732a91a52ac25949 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efb2cf41e38e402186496ad32ba752e8bd0e8b53b77a5156da9dbfaba6bf52e +size 61177 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..435f968022321f2e3e7407dd8598329edc57cde2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5909b54373a81e344758101dfff3ccef73aaa3f67e91739f8f983c8129fe5c43 +size 40347 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0abbc26331a74bbfb5077f03a3ce2a87f4ecdd4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88b8f28c701a590be42604f677c31755025c121bf8371197c2eab8aa561cf4ee +size 23739 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..454a7115b136310fd6a2fbce1bde57c24a91c0db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00c391d400bdc706ae521b67e456bd8162f5999d6704b3365c5d9b88cb2803d3 +size 31097 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e440beacea615e2378b213c0c180b365cdb510a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f84bf55341c7040cd992f99f7d323bb2295c07415c4c68b54875e167fd1a223 +size 22890 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a4686b44a107dd247eb24122dc47bb57c1d36c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc703b59734429c7ac140bfd13a0ef98a410b9e37f67d6c3d7f85d601a8cbe65 +size 60875 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9e1df51620b3513e45376c437127909a0ba74e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1388e515a6f442ecf4e46e95ee51ad4588ccfb45a22c99d597735a5e8af55015 +size 25715 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2953e6977b6bb707377d137d2ff3724844a483cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e569ddafbc37fb0118e50e17abd2feb68ce653009d19fbc7be184d570711dbb +size 25772 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..832a2b00f87caa38a33c4f4fec06d4920e2e57e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd68cf9b09f8f7bacf2a853bb120587cb242b4aeff9275e3b4cf07057fd541e0 +size 46473 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..706d1b12f66f203896563a91396a6767fee5dfa6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:601db67adca1a76bbaec15baf9270561e38364a33b21b44c519913af3c0f92cf +size 31460 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5f37396bb980d9203675ea475de14e754b2e1dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d448a0591bd3c7417daa11a9a86b816333e5b6a9e75b213aec8287a19b81ce +size 28781 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e89088653d7319f3c1c28956f8a032c8b882cda --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74aa9bbf0338a50ec96fd7275a3b56bcf28c31d2fa3a1f3b8e64511908ddf93b +size 74358 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8584b6ce216b6decc85bc63568cdb6325f98b327 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827f1cfc3cc519ab57a336b73de66660227013ef49e720c6fd01181445a1216e +size 30033 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ebb964ffca36457b8435004afd8d02951f999ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c5e64f298713cfd52ec2ddcf70f69e8c5053766eab5935fa05750ac225032e +size 19073 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e15a3155cfeaa828fd8df6c2fd775377768af69 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:685b4bb1bcc62a82ad7cdb13a03cf1f9157e600d57b9b065b3f8fa4891e18378 +size 87871 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e6ccc8481f87378db1392978992771028660053 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe7325c04cb07f0aad17f17fa1a2c47a76af69958e2534131a250304e9ae97d +size 49925 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..605141eb71fa434ca5d8d0e4a966f9d4cc0a23e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8bee4c31d17a72aee6a66a584705d56fca07afb002bc5c4061b09a776b667e5 +size 31557 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3c16d3a5511b9e8003e833ccd764d0cfd3963f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e216429348fa3d125467feeb068fe924ada21a3fb3633a015ecd84d74d22cd6 +size 145420 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3574ff6de7cea780111dc6df4f706971170e1468 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c73da6f5b1d347150821bf6fe9cb5eb00e7c28ea29848d17756205e837ea303 +size 44549 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8311bfbc0731cd34c6d50eba764909203b4fea0f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b92cc341a2fc2f61a77059bce8e784d6cf6d1510573da76af27b61d7151513 +size 54388 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3acc6a4fdaa267cddd78ae1f180aa1ca236c5a41 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04cc8fab68f8176ef1b9016c21bbe6393271282cec6959926a63eb5b03c99c17 +size 92605 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c9f473a81e5fa8a89c64fa4b9d46da03b08d965 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97737b48260a736b4d17e45070f24a234d7545d57048aa36ca09b4a797ac63ce +size 57001 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8cb7dd135922a1edef6ee559e85c6ca445d26d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08ca4d5bf96805bfb10f3432a071452817fca64b788ec76d691fc1aab147fa08 +size 58362 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32abebaaf3bc3031f39d1201de7740a35371d9a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45c2482cc75427569c68aa57149d5a30bfe3e732043495b93d927cf70d495cd6 +size 42793 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49ea3a8bb3db96789a4023c841395d1a3b55e096 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed300d91b5fd2a1552f74a956851980ed42d55e22772babaf0f23647106b855 +size 143774 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fb749d69bc0d5337e46553ad87355daae06e8a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82f62665cfdbf6680f75b63e65003dc29a0594e4579b2e214c1738fa4b35a4a0 +size 71054 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5ffa77188faf155afabea64c13fa59fdaf82aee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae34d18be597bd67f45832eac348ff626fad370d32bf1ce7e8c8e6d075d705a6 +size 161977 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9af01e60ab78ad21ed430a784f67731843cf3b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30351474c97f42e839d890c4ef26240d32e3add7bf6b82d0e128517d01d6afbf +size 209369 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7db9f9e4f25fb1dc9874289d9ad2ea0a6c698e27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e90ddc66063c62f0cbef5d88f8f8001d169f401da9467955ae7ca0c814e3ce35 +size 49825 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2fc230e9b7df85eb5dea86a67d238e5faa7ab43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:903afe464a3239be8d5fada6142e5a53fd501f6ee12302ade96efd9fef49fc7e +size 31492 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa3b00f4bfac8a29ef8b22386d5e80e25f80ac6c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:913cb756e468ad6897e7bec2b8d898af2be633a2f510781573f1f8452c154151 +size 36046 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e991f1e6d36fc6f361c7802f47ee2bb4cfc8b4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5677e24706b6cc2d2b619f074fbeb0fbc1230c1b9fca4ec00895b143606cef01 +size 29490 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11b2a5ecf5b17060f6b31f7c7dbf4efd492be0d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cdb24080369fcdf45305e0558f0283587af7eeace913db0220343cecb1a175d +size 40297 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40af004dabe73307132dbfb52226a6e90406978f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b6f3dce51b0f7b04bca368a8063b4af8edd9f3ece8f9cf6e55d8b3bf1e0531 +size 26944 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4d6c776d71173bf1b9f26bc579ef9b824a2121e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a550424e8cfebfd834056c7b6c00e5826a529edc4041bfdf48fbb74326cf2075 +size 21684 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15697ea4dec599b4c8c94e7a8abe584d10ffc31f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff83d284994643ab0c34aada2adbb8c2533838f7746d47655ba724132aceccf +size 57373 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d7c2b5698dbe1a2d8cc148761426f6a67cb71ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:793dced7d52a8233735e4ecba2cdaaeedf55c50a9c3fe3bbfd38ec50ce2870ad +size 22267 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47e7b3bb0f1c395b512eab8054d1d63eb23f288c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4460c38b04b4e31ec37e5d6806142a612bebea634795aaed2a554913d7ab1d8 +size 169065 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f968d9dce5aab021ebaef48aba37f8e0a802a748 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef21401523004abd8bccdf7a7fc7aed794c31d26ea6d826b3e0237d471c4e03c +size 90106 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc5888356e956278aa290abf02a2e341da4ce0d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72eee55306dba9c569dc35b90e648c17cefe927e0f71c46f2e7a2396f0c01348 +size 153793 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5079e662d51408cb87bd6fbc33b3b8313214dde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66b22d1cf5fe6a24d95dcba7d2f2161e128482c47f2c7a980c4c71ac913eaf00 +size 79311 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9e56a14aaefc13bc7deebc529e666ece6dc2078 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8932366c95255ea278ea6939d2f841f11527902071ff7ed968eed0384cc3234d +size 74202 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bbb966901e5064390665112e921aed0e463ac4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0db666b7bdd4ffa6824500ce516f1556b051d3645ff711f8e71ce7b08770353 +size 82508 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14969b36c8e93a6a10f77a0e93f37e0db7655131 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5169de2332af125d13bdd0752ff3dada675dee354d699f8df7e903ccd7b737a3 +size 89807 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d7f65582b18cfbbed26a229e911b274caa2e9a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:149ac221156be48dc7c448781aa0c8edc2469e7a6403a6c3b9e7149de9c48aa4 +size 1004496 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e653f00fbd67e44b5a44ceaaac00cc64823608df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6439a3da0fa22bb01b47a291d95460044f259f2704c7e07198a2877c3a365279 +size 132909 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13e34b52b8980c7b7bf37f3e7af8993d8f2e809e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5855c0946400542cf7491ba5a3b404dcb022ef806e7ee416a1f263df9dde331 +size 182861 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..728f45ecb401659715d961a5305ed79f4edf98bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8223b48374ce35ae8990fc1868364105b3d12bcd3c38f7dcf581026205caa556 +size 27521 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cd5ae64b977ce9181f34a8f6b28a92162371f43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbbd1db7c9c15223424a1fa4b891852e8007e897bd057886719e523178982467 +size 120305 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f60d6f9e3efaca62ed7e877168fadd1a5a727db5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f3be107a64da1ffa89d13696ab522dc7d6a6a7d6c3b9375ae4ccb3625ad460e +size 57982 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb37681c4e93366a7d68085156e5d9714c28c114 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f932bdde0be69a6f164cdb48d3fa0f9922ae27cb3c52aea1007a5089fedcad27 +size 25419 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..647f3b6c38fd040e9e52f5c19a72f98f89cea14c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b72535f44c0741f9a18d846df429651eac691795e2da09735ba4ef78bad14798 +size 39404 diff --git a/eval-results/mmlu/0/ckpt_150/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_150/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b70eb6378e924f73401472a4420f3f71f78cb94f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7caddca06f6927fe75e9922f8e9799d8fa3436c8d655e1d9fae247296fa837b2 +size 32930 diff --git a/eval-results/mmlu/0/ckpt_150/results.json.tar.gz b/eval-results/mmlu/0/ckpt_150/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28f3b2ca27463b10a5433c7b2e4647d4e7a546bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_150/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c754a02eaafd1a3364ce07674c86497632deb4626642f008d28ca66f1ee1eaab +size 7629 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dca4a8a66db88612f3fa8acd3069c288e972a88b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac3d8a60c00c33bdaacc54b7d79772c7475379dc952ffe519d282ffc9c822bc +size 17008 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f30f7ceff13d48c869fc48072c99e0c2b57bba7a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519aa985dcefff39e8a62288e86a4d3aa4cf4d296e338de62868019627a3a858 +size 29771 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41abac9c03f5ffdd4d72d88862f8897f0c504036 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e98bd47427a795f5d5c027b16b024e442e1d39d3671f5f400826d383e6cc7e6e +size 39835 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe31e3acf8cb256ab489293164cfe0665500e99b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e959ebc66edf25387133b36f2263f2ad5c1e324e1b945648f56d8e02b1f8c71 +size 26722 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2d49735796d81cd81bfe9071bfa0f938d5575ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de384e48d0225492792b7788109a878d9a2a2594462709798b6a5f4a300ae467 +size 61142 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c92d9299138c276d2828b7927d0295dfd7008db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:972011e9fb8ba6c5135ef65aa80d840ec62ccfc67abeea1cd5b55d19ecf2125a +size 40342 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9760174a792284c1c5193575aac79b52d6663620 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6d8f3d0e97685acb74f3804508829b781b4e331371de86c9d6a0edf3ae5f793 +size 23720 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31ce895c818449802ece31e24176f53f1efa34d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b538ee158fdcbdf5ec491c1781366841fae96dc4148935c87528591e5a32fb9a +size 31074 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b4a9246eb55102040e5c0cb56c65298238aaf31 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73e9bf544cbe108d089de9b3efed9d7e6db376cec3bc1e43e1daf75305cccfff +size 22908 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76196a33465d1ab3448c0a198e138ea7a22762ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f584e8353c122dd7a67939b70d7ea0b8295ed50fd3cf8026d0e26665333ac03b +size 60823 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01581ec1aed80294305f48dc59b842570dae30be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67b4bd93bed48c1bcef40ec92140ff36073dbb09da884ea3fbdca666bfd0a69d +size 25657 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6a6d353370cef1341d8aed287952e0c88c11f1c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27c429f8e9b368c52d6909e2439fb5c67451e1028364054f19398f50cc803627 +size 25796 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e9344f3074ac0d9a35a39c36600564c8cd0e7d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eec77f7584e7d4994b5fa2d0e57162539b8d1eb75fb03dbd6b944ae9bffe8ca0 +size 46425 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12780d5f7efc2758142cfbed40aee173e0794958 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6159904be31597dc10e9666d87f159fa05e0ef62927c2a2ba201da930843eb30 +size 31458 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9ecb167a058993b24453f2fcc1be4250fe87e76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27019c7ab4295990005aeb88660001730c82a8623b7db65a87640e54389a3869 +size 28739 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e58a63edf3b528a1035682bcb66adffcaf7ea359 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e47614b505ae88ac58ebb4124903d5167a349d3406fcd5373a4f379925d427bc +size 74365 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..140a18f56cb894f789026d85953e98af2d06c0f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b3c1d3fb62a65ae915d2ee9be17e25b4627ce691a6871ff9304febd4f86f53 +size 29994 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69a7d6c6ebd5c6fd87e21ad4960b7de256a946a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a685d9214613df2f288dd4c116fe559aa1c1f94bf3b15692f96b49cb1a6b3b +size 19027 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e68798ad59262f075847c7e73c4a9717b8cc5f41 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115da3d3feecaf365b96cbb6850c352acbb01a2a0bc408acde27a8e5e9b5631c +size 87870 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee47571f535ef37404eb6b2a0642f44316c33dff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f75c768e206d0f94fff4bbf863dbca2d5f0b90380e13f8d4b31110a90e412d9c +size 49816 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68f70f8de9923d976dc67ff75f98cb7d08e2b187 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42eb81058e197cb11ee1177d11f7e868a892d48a9b4967a75581850b44ec71c6 +size 31573 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8dfbd86581466d8402c8c9f16f943c75cc752dcd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bde8fe9fe22a043bb30f98ddac779f9bb8036a030c491e342a42e82b04b758e7 +size 145200 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd8abf4e33c41800bece28f7cc01a938833c8780 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:451d836d6b9f45a3812d79bd5fef01fb6fb7a079571bb1b0805d122832f8a2fd +size 44453 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..572a505fdd13b8a21e8a915a5d7c86e0457fbb9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6824f9f4f7a838f2557bc22a33d6f1165bfbce359a4f77d205bbdfa22e1798a5 +size 54379 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..566aad4013f8d54b883d04d24babe0f1f0e8840f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68130c5c2a5ddef67448659eff8430a0b4dda4371e32b1318a6da55adebdd1ea +size 92561 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7842562438041c597dce17547bc7bfecd14615df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6434f3119bcec006a292f7a1b920555a3d27688fda7c695f27d8d54ee97108b6 +size 56964 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d95fb58f16e05b2e960f1d1c28b2da871f092aea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9b7787b7f4c80720f923928ecb5ab6091547eecd5a43c5eefb0a2cd79ff0bc +size 58355 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a91349dbeb053ee366d21c270e9552a8d8deae5c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b847d7af2ff91d1a11169abfe3524b30914987a717d9bfb24a02aa2fab7d511 +size 42752 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5c80279b2983773653785928bae094f3c11610e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e976956ce4b3dea55f4f6ee84e684c25216fee060451facff96e3e077fa49e95 +size 143731 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb16cbef0d528e204753ee1b9ef368d92794a75a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba51c7fdc43c77105522c2b8e19156adade18d70ac5f30f7a71a5dbe55c8abdc +size 71010 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2eaf3f3fbaab06aff93c636afa8831d6527d095b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beeb8abeee753833e495ffdbbb792043d1690697d4dc4be82c00b6a8d15fecb8 +size 161803 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07e207620c420169ca9bf80cb42dbd1198e4cf35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3f46c91c1ba802916905a4f89248a6198caab9df660609d9ce7cdfa23fa494 +size 209226 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3286fd818513badbef2186c00f671ee80132965c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56529607bb0005e0e92148f66e93e9e4b8adec37e061633974a855067fe97b70 +size 49793 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15c3109aeada0f016710bf5111c5138ff997fb95 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003a9e3dace0a3f2b34de6cf5246d2dc3f7482b942ad860a3d9752a2c21517ff +size 31475 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78149947e90adecd8d77bb83a6e6f9453487814f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaf7bf0d691287eebd585e4d5e8445dad4df0cb77400fafcc56b102e939ef00b +size 35982 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00aee1588085a2ce9fa92c375858acdc676e7895 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6308bad0735f88fa97e4a8776576332606eaaa7d84c93fd1f90f1e3433d69c97 +size 29461 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b14dedd4f92890697d75c709431609be22a2353 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49fe61f45c07e556afc90aec031e5c5dad008f89f135b982fbe7db83fd08372e +size 40304 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f30f4414d6854c7d9927f7e2bd65d983483ba42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dabd41cd3daf8e3bc35e12d39bbe14cbc5e4e909d473fdcf26cbf5ebb76583cb +size 26956 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ac23392a415d7c409f549d6352596310c34b530 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452d37521ba814adef040fa4ba19dd212c6be00268b2cffb5a7603a16347c833 +size 21703 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..294c4a9a13df096fc828c31ea4c78f939db78f17 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54339031feadfe06f77506ce784d5e69d6b13f309a93cd66d5c29607e8b2abe7 +size 57357 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..def5f5466f2f73cdd9f1e176145a175f4e9b2e40 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7d69a7834e153e3b234e19c9acaa8950d8d969441e671038323f68c073f3036 +size 22225 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b0949c23e08ec6706c5af58a5f0dcc3db532b10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3830bc17aa99a01388e50ac1dcae1a82c934a26008cb890689f7909ca4906920 +size 169011 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0f5179362bce5ecff7e41ea09cb47202fb6dbb9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ebc617a4167f91883726b78a73e0d7ffc69e892433c15857b2097d466cd1a1 +size 90096 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..552e6f15769eaf3470d8ce527bcae711fe0ffc8a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf046ae32ce256839ffc60be676225829b96117b0b77ce4ae0824c8965c4dd22 +size 153206 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0737e81e707d3e75943ef08f1efa60323301fd2f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f08e873bb7796122c3873b505022e57b034777f4039a6bfce8de294261a8136a +size 79280 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5128144dc422512bb8f1652e4c2d6c1a12670c12 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9b6f911904736c914d50be35c793fba478b2f3871d6e80e4c1b9368de4bf890 +size 74216 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e213a824437016d128f428882bc43c4e5af57489 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ef7afd79dec6a5b6ca4421f3463713fb430b106edd0f7c489b594f357d8c73c +size 82449 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42c14d475c712c6f5fc99c849c22de2635718b09 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1af62ec585bd96bfda8dd30ee75d992eef7ac247527a232034abd1b30d61065b +size 89791 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a08efbe3904a88da6d25dbb2c615490de2da541d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33142ea182a6fd064a18507133bddd923e57cbc82bab69e0d1c5b5be4efe06ad +size 1003409 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21328689eae7a96347814e1096d532be6727d34e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06e70bc04dbb02995d8cf6cc65c123454abdbfc7847c2448ea0ac24e8d421aed +size 132854 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38d8d7d8d7e6c26a3c7d1aaf8e6a61a432de89c9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ef2561f589b6848efd1d9d816e8d6de51107b4345f267592477271c7271221 +size 182758 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f04fe08cbe919ce5d8a4dc19752b8b8cb59d737 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3930feb26562360fc9c3169c64ab4c58482e159a26a06e1cb5911fa4154e755 +size 27496 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f67189001b3b09eed7b5ef158f2f9f2dd6991fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e1a6f4b47a12feb25e717627d4278e6bfff1ef31eb24401fa82cffaedf47135 +size 120238 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edafcd39ecfa7b02465976adcefeaf34e28c8d6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa7c5101223c058481d429c8f8e3eb9c0149dc7faeaabe9e4864b794b2d61ac1 +size 57896 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..509692048f2c4c2bc88fa09eb161cb0f34448832 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf55ea3b03c91e445962c42b1a0661c672f00e50294a08a7af7c60edb1ad46fa +size 25415 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb949e76b8fd93f1bd5b1da47812120576426e81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c296c70de469d90aecc0a08cbc5607c559d187a3e6d5b4510892186461956742 +size 39408 diff --git a/eval-results/mmlu/0/ckpt_153/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_153/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8be76149652de35ff8db00f12db935dc1ca6c00e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88619e67d1525bf7272123f24f3dcc59d4119bca520464045a05fda2ca81bcda +size 32916 diff --git a/eval-results/mmlu/0/ckpt_153/results.json.tar.gz b/eval-results/mmlu/0/ckpt_153/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24a1e517b7083a26dfc423be1b864512e2b351bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_153/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f2d814671a48b15c4e696c6fd1906497d4fc428576eae1c89181cd031e1f72c +size 7612 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76d6e0f4a173a93d57d9d11c4e3c1c22fb2d763f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616bc2e55484a7b4b0c55748e1dc5d8edb27b210e61e1ae344416a20ced35c83 +size 17008 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcf4ee60cde694fdfee32ba67774228f053bf764 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:978ef1635f678b3b400108c5bda954c8844de08bf288eac715bbd509ed17984f +size 29792 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7cb530053c5828a872806b135513a94e4610ebc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:578fcf4f6f177b8dd30569dbdcd5b7e279b6ee89f9f3dcfb55a569dfe7ab07d9 +size 39834 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87a64fc37c3f7264292957208f18639d860d4d2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d7d5b3b1272d5b232932c6a4d21dfe171785a9e8128ddfd77cfd11ba74d9ae3 +size 26736 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85464bd0a5260f35e1ca74590b3183745384f603 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82a1b70bf6ce1d67d4c2b0ba500446af0027a0001dec220b68d10a4fb42cbd41 +size 61229 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc96375f446f009f08050f605ab6214c8013324b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1764686acb3c5920f6c3053835fbda57302cdaadb7208fbf9009932cc5c15663 +size 40364 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b5b8cfa8d523cede9c4d56047b348c013fb2c13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4008b63e775d93a73aace1e566848d4aa63fecbc4967e1ce6d6def4bf7653d03 +size 23759 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48b1735de207ead87cafec01bd34f94a26547341 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a28ad8247e250a304e4d4f59069d287d0b55d887613b02f83df94e05fac23c20 +size 31063 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a871cdce5bd77ae7efcf6aaa31f820e56d646bd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905d7aa1e51ebf7612d3a44dac78b9d8c03ff07bcea3c45e2fd5787d3543c50f +size 22915 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53d7288bc686db88de5cf65067159006aaa4d412 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5533371165974c6855e0dde8b2a513f6249630c3e3bfba1d90417a4762dec8bc +size 60876 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75d10c1adc0b382281e33a047fcafa09906ce5c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d75a9fc0cd47cce4f75525f9b9d8202a30cd992700543b3dbd41866e6c716a51 +size 25669 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb80acb832fe002ca756de797a1cf70a1848c549 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a165bf2b63ddb7a596cb228147ce92112e30870cc77a3cc31315e04916c2d18b +size 25774 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..deb1ad1b8c4359dc60de21b825114315fc66dada --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11bf59bb46bdc8426ebcf9b38032040852b25ee0657b88343d75ff7e5022b8b +size 46381 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db8500dde600d52f148a721fd47f5421fd537cdc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eb36140a2f208923e37e4751028caa996d03cd3225f573f3293ce08f544c8c6 +size 31412 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59a40a0ee36251b8db84bbaf9a9aba6a9acefca3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:158e465f0ca49dd719ca50f53a3eb35e027bb1930ed86d5b47740a5c4c60582a +size 28735 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ee7856a2daa290edfdb503aa914a8d4f3fb4e91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28cd58f2eea4cc0076b5fcb1dad969ffe68b4222698bf2363d2eaf58497d7d7c +size 74315 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ebb8908652ec0981469cbb3d2f40ca6e1a98efe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:757e4c879fb30985161d497ae0201c649b41cdd908c9fdbdaaf3519275fff2aa +size 30068 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c201de7bfc4cb594aab236ad488a26d039754b42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5294bde37e947346ae6e0069d46d3bf6cab90354810089dce4da68fb596e89da +size 19102 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d3c15ff57f2392ead26f987321bf8f48402ed71 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89485016664d4bf25ed388af725c5b7aa079095c96fadf9a840dd5b47a0b60b4 +size 87920 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1f35bd4ca087a9586a985fc3f2df8b5201956d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:962ce8f6f184e1dbb7a58c3864015cf2d44d1a6599c7f88c9b375e46132b004b +size 49875 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37f387fd04a4f752adac34808a3827f260624bc0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f0837d9185cf5e1d8c2655bf4abb76bea28d6231fd48eb4d757763ef09a6f65 +size 31595 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99cd5688c312626081e9320647eca89397b8f585 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f409dc1f9feb68b21685e42867faf838b445a90f9712e3c04cec16aa73bd0c94 +size 145321 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6bfe1a3191581de673978fb1c2ae79d815d17d2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:340ee61a4aa4d22afe979bee03b6d786b0721a147d08dc9a016853cd111f5997 +size 44521 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e2662d5add540f20f305e78946f289637b883fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc2ff19c7ab5178593c7531d71f4f7a3dd6c6e8bc9fa5aad7600b40f6364e16 +size 54428 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..873bbffa0a8b71c50551414ac8f35a670e7fbfcd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d043c1f49b4c71e0eda970acfdaee561cead308d6bfb4a81611edef6e0f93161 +size 92580 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6abbec2f947e0854d934f55a9214b9ef5539c90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4da3e6534c68e13594401e00970a9105041e9aea999489a860ec75c044fcc299 +size 56965 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58f8dfaf41af2578421fb72c4d69f0b944844644 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cb54a70b7e9b4cfdf806c50b4bdd433be560c6b3a06f5eab6b22089b64d1cd9 +size 58380 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee3bbfd2d2ea922e06a4bb218f3d65819cf72762 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d412585fbfefcb6b5c24eb0f1a11e7572a4249f591b897a28367f6c6fbdca4e +size 42832 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93e7a8aae90894cd52b4099dcae697ec24060c57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a9afafdbedeaa201cd8e8c9544d26dea7be9a29d7b653cb56a11098906e4593 +size 143858 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ca6afdaee43c0a83c1aafc20ad1d8dae1a0b6a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d239fc1c081ffc304036370f7464cc87b9635049da183e9b9b175db0ba2c6050 +size 71063 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b07aba825844bfa2f396b2ffcdbc596b9a1e3850 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb9cf39218fb19ecdcdf6386774e7160f5c05626e933fab32a8c46509f92ecd +size 161934 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7385d6cdf19ec6b59f65c804bc6917c3a50e22e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce099e9f6bfeee6537d029319b5c190563ce990b8d604c466b0b6f060989d53a +size 209389 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12af11daaf643855fb64c6c63d855cae18deb770 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150b971f79ae04ef7d31b0b60ab2911158739c83725939f8a878c4086b897471 +size 49852 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..185209f8572957165ffb560e922c2d45612cd3c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a162112b566126f32a031448b1702b3ec8d5c0984ac3b969af84f39bcda2d322 +size 31484 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0040631054f1fc8d27dd51afc7f72f709b7e27e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24edb554a341f1c89c0ed22e776694bf59618f1e814249a4a3b1626a0ad80bb4 +size 36010 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..925a97a321941b91cb391d4522fa22e33be75a77 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd4dba09b35fdb34da89d0175c0e945b44b57a5bbb39fef0de001975f46d79a +size 29469 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c27742754ecc248814ede8537dfd0509478570be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c69c46cde277ed1dbd0ec744f61be66e73fad1f4a6c387605075567182762931 +size 40286 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..910659d75bbf3e91a77cd62842c9787642e29443 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3abbc294e31b2e453660563d2cf03696b982c34430044da10b9cf40249b85064 +size 27007 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5919cee4c96e3d36d753bae76df98d3f828b4f20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa80a72363b6c7120cf318e3d3149657d496209c4dcc6db4e353051fa1920be3 +size 21685 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db052892333df5260710186507ae1335bc484fa4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce8fe0f11fd0ade05145958308eb044ee65b36ab87fc4963d587874b7f71beb3 +size 57383 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6d9e2573990ab3dbb648e4cc42c78e2902103ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60bcc44b2645d91b1961e5f778543c739d944c1e5d4e13160eb5ccb7f29bccb0 +size 22248 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce395e60153a0dae4923722dcfecf4a284f6c3b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b93ce42d330e61c5f8db50fa70e0b85b2890f5e0d4494c2b1d36471448bea0 +size 169119 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..254f783e173876b6aaf2ce20de0b0d11a2bb1901 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b0171603a19280a0073d5c8dbb2bc9e964d80cfc107aeba07ff862689915a4 +size 90143 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab06290161e4d54fc0f109a6b0fb61ed854977f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:709ad94d85c62f27ab88329c65870a86bdba133257d1b6c4e86fe8646576cab1 +size 153747 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdd86cdc6bdb29ad7341764d6cc775cfb090f3f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5038684855cf21bfbe6b0fc4d4a65171e48549172fa2258aff3a05595055be3f +size 79366 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7813bff9bc367a1b52a157e902cf08b72da3458 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:250c81e966d55914e3f03c5f26c3ae597ffdfe1c7ec07779cdb99a05d1b02fdc +size 74292 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c100b18a77c3c571901976966ea8f6efa8963df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2725aed888f6403af6113cf41fff1dbe4a9f8dfd536b781243df5169d0f7e1c0 +size 82516 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..939cb8c4bc4816f3ea41d354ea4b870bc950c9c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d2b83311685081a34f9511312115eb0e92f34c0fca74944427fbac09e55446 +size 89785 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ee6a127754bd5c8cea1c2fc8f7ec8ced31ce4d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65c611552f79f79f63815ee2bc038ea16267e2cec3f1be14350b01eff4b2edf7 +size 1004439 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4605c2339dbde69b72fce11560cb8771bc06cca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d81bf104cef1ebeea3dadbc2ba9644f1985657cc511623e0eeac9d9936f2c75 +size 132824 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48d3f5894279c4c0a99146688d36b5b1154ded15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb1f954fd3ff920304cde64e7b5d6f14b37ed050f8613fcc8efbd666f36fba2 +size 182893 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0aa25fde4ac2bfe2c88fc31819a45fb25c58fb32 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08cb7375a889532e202f1bca19e1f2b07627036c13ed4dbd99992aeba8c24537 +size 27535 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c729de45fe93e881a094d566e9db041df0252498 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f481b001b1f621229de4c7b220491115f34dff8164bfee24babd876c5282116 +size 120326 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0100e8c2b20e6461fdbf01db88a8e7b328b59195 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f4dd67f12358b1608a316febaf8c67c7979cbecd4e1af37859786b734d07257 +size 58014 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da2e7aed4fc98fcdcadd3a94c4689b9ce43aa542 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33dff57074334c1fea4c6051ab2e0c61b66764e91af338f8146f734e6c1504cd +size 25420 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32462111d4065121523ef99612f51ef89127630e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abde6a66ec927397182ba879457713a3374ed579631b6ce6366bc19a49b92859 +size 39444 diff --git a/eval-results/mmlu/0/ckpt_156/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_156/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42c3c67ff9fda76bb6f35ed1ccfde480bd5c0e16 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc8ea44ad7e8291e08ce8815581748dad7e136a88eec5d613c8da6ac311b477 +size 32949 diff --git a/eval-results/mmlu/0/ckpt_156/results.json.tar.gz b/eval-results/mmlu/0/ckpt_156/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79e05cfb495ae4e9aa2e6c8c9118cbb3852c51da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_156/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df1c90c2abe168201cbb0eff95b40d26adb446b96714822e92ba8d3b23ab9a52 +size 7620 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bed20cb9188ab6b858add3b0dbb48c0af51ca7b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17e88f2eb938e0768ef52874408ded8e43aa47e881830bd492d0e855f00f8bd2 +size 16969 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bc798d46afa15f7ba2b31a5ae26393d216c36d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99c82afa4b9f79ff282cb93fe883dd25b2197a1737a697c1731961d4b5474dd0 +size 29727 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df1f01bd2e3980d40396e0cadba9ada7ddd65cb3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8facb3eeed26033fb9527d98b99f496f47f9ffe40a95751adada8bd669b27882 +size 39770 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80a35652c001ba4a7c940ac6c59196fdd56235b2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b89d5108fd9530c709bf4a7453f7b945f919fb52f3129a46f3ff289c15c293ae +size 26682 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c9627f678543650014a83a5e769960a423d7064 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73184f53751f38b211adabca8ca67ac4b19d87e0c020d8230e8b0cd8ac11759a +size 61002 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6593df98420cc90d942e6e2269bd2f9b1426b587 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3ce222da84301e24a6a88bbdf1c87eacf905d9a22afe97194c69a890378b57 +size 40285 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d32bb145389b08638e518ae5b9d2714e6522e62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032bb928b21208fc418df0b11db998cdd5bf289a8fdbaacdebef4e727f8fc212 +size 23701 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f10b2b7ce2375af128f05aebf41b9ec4150370ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af26de5046cfbb982fe6938f54007164d373b337f486e0d216b0e153cf447662 +size 31040 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92bf8e1e07216a95dc634e7ffaa5b6541280feba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d908e8297d7e22514b38777b8013954a0ee6a738d50e57037e8ca1be487869e +size 22871 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d133a6c87b7c7c2a54be91bc73ff76c1dad296f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f175938789b8496a03a957a06d3d87e0e91240920d8557a4178d0b07a16e1227 +size 60788 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af3ad72128f8427014c6c4de2d6a44f4bae033ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74287244f5ec86bc8e9e4b7dd00fbc432e36869f1050ae45921978a8bd28b305 +size 25669 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0f6e4dc85c7e3a8b0840d480ad87dc2c3edc2ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519cc9b75975aaefdc03e6e7a429d81d46bfcf3e729a49a83433543d4f77dc76 +size 25723 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..020087c4d0519974300dbe77d9c346357302ff89 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b2be5b438688f6b4ab826abd0138dee27798d571df392fa470f59d732a4e52e +size 46379 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63254dac347e19d9d7e9c5eaec4e5fbfb5601b04 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c690a49e7b5772f4bf031cb7a648f053a853b65c21132d93d54dbc9c1567ce5e +size 31386 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..066e41aab97fde5c0c11b6b3732eeccbd8df4fca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e84c8bf912c9f17629f76ce08104973f6a1b150a4e6e3927b779381a381a9c9b +size 28675 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24b8a33a332e44bf43889545de9bfc606fe0dc47 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c08f8b9923765801c4ee3eb33610b945f46fb8c9f45e72c97edb791f810ab7 +size 74292 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f7ce3883da141c1465177d2b62016cfe9f66e48 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e488c7451b0e74754bc43b5a2ea51e6218eefbd88ee967a9a9f9899e661bca +size 30058 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbcf90570ef4dc60ede9469ba0760424562887df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:225b92c63f61b862df6346b4e1773a22b347e31794baa0600932d14e5dbfd6f6 +size 19022 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0dba0014a8d9033dda65fa6640abf381ce5812cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e0b4d03c850617d8ef9f09e8bf2521e23bfce311d7c579ba3fd88cdae82943c +size 87839 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6bbc525362b1707b6628699c2cb6a030e85c5bf8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6b88e8b582bd1ecad5dfe390865da44c6491c6cbc4e55defcc4ea845c1d8852 +size 49778 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74f197c838b6fbd7111dc6b4141ad38d886d4a37 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ef6818fce02979647af15646bee3aff2e99ac59cf4ab5f59cd334b3aa68aa8c +size 31536 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c4b7c80eedeb820f53c417d6d51c8ef4edc5223 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa498fd1e8b7a9a143e1f2fe97c087916e38574ee3234d20f9fd4e4de2118d8f +size 145297 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d68d1715f1b7d925f0e371e0f159a99c3a9b7b3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:203ed6361fd2143d6054271f5460559d62290215de8e89751e6906808a23857b +size 44468 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3d89889bd8b2febef934398aa46b484c0da11d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e893727f84f9a2b85ec06d5278ff60217158a2247075dcf34fd0ff46050d54d +size 54320 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65560ca11a5c7fac4cf7a5591f8ef393db4fde1f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:859a78d7f2358da6d9a8aa15d76360946fe6910996167647ead31168501c554f +size 92479 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..076e7360c15bdfc160c1b83e9a7eedd7f72da412 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a98ab6e9b1b7aab915160ae2b1951ce5e327001234dc9e1ca639edec73a49e1b +size 57009 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b915872a4fc82287b78095d61f0458d3cf97b11 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de67557a1126182f28209e221a8a6a3be8f5aa63133ac070cab113e28d7cf8b5 +size 58290 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed954a9faa5a74e76939b23095657be57c4d2f0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2974bca6839948a4257831637783085f19a6fb1b94663e4f773f9c55694c65d +size 42734 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0c9d1f0f14fc2657f00d16dedecd7d2d527cdba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0defecf50e6d299bbed45a75e73ee04981586aeb341b7711d8f91006356f7b57 +size 143737 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f4bc8e4a6e304a128878f8fe80141d2181ace6c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:652d6111d92c54828fdc0950aed4699196594c82fda50351150f85b3cb2ea92b +size 70911 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ad5c91128ce9fbd5dd2762fd20cd54f1cd0ac08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ae889c0f04fd6bd10ebd25792877ebf4dffe687a64f23d9b808addf831eeef +size 161831 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02ec2b867a594c9ff09893c79b2bafbfd0773789 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eeced4484f8b07c645119b8b241517d6161405b9a287aaa0acae4976cdf43a4 +size 209308 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a02bc704fc28e01540b477e144a200cf295ca90b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7990c20e9dff6ee55f4dad5651dfcdee2f882ac15ec8480e1e2dd62d8daf2166 +size 49768 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a55e3b92062cb93390695b40308ebb2f3429234e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b81fca498e634adaf8b811104317e9de52737f3c569b16843ff9fcef0140d5c +size 31451 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66d146fb8dd1bf568ef73ffd049664bb42c713fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e9544a0fa9589eb46d31aa3178a9ba453b16a623e51b594da4dfdc95313e5ba +size 35996 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e986730267827fcdbe7c12768a65e368e78c1c71 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78033a6f31826be1d2464432667c420a3e87af34a1f9efa8e9e94cd2481e2697 +size 29400 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d0e70cd14c86e735f1d82d7217ee863b5c4380b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8e770d0b1a35616513f3c3d640540f6a191300e290f3a96d239a8b8e0a1cc08 +size 40221 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a61ec77cf5e2d32126380ec58788ea2095afcdf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e251ca4858fc7be7eac7633a8146b6f1ff2b124b08d6c615bf249462db47746a +size 26904 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a994860e3ab4a74bf48c32da123e57e56675292 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec32680e8f43f7272c5ae8d8904bc7d677435f336b0898199eeca1c02d4e52d +size 21655 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65892ede35a7547b4624a91d7364482ed3c3c0dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:344eb6420a180e1725b8cd2cd7775bc4b2793d47db0e2272f19cf7da4c295ab7 +size 57370 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a533fd5d1f36d82e7ea485cf50173d9ca64ebd0c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677162410048406a0a6d7f3d6f401c3dcaaf45b86bc7161ad452ed3555be7a32 +size 22239 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b0116e4369654bc0d3ee8e00a065adc1444fb0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67197540d3e43fa63d3825487f787cd1f62e72e653a3a4e9e409a4040a7f617 +size 168965 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6e4be3e8d39988a9c8813ea94e4c2c238f6943e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751f954884276f45d2fe78f27670ea32080a8981c6e271cf3755e3f80cceb715 +size 90028 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d3ef9cb17ce43e8e980c6e02b0f6d1cc8957861 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c249538caefe8928487128ecd954300019223ff49f4732484b293b3149c1f0cc +size 153472 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9290c6d38782e4e560e7709380fc7c5dbaa5a034 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7658558c575718065abc97faf77d8b395fb90859e1cc237dcdee66b69ce2e26 +size 79224 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22d83406e796d00b895add011d5891556cd16026 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e93e94069596531c012ba7dc976f7a5828ec358e644a52864f503498151a3a1 +size 74147 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..545346192b2df1d5d2b0558873314c8329e91f9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3867a43bca9e47baa9d9a490ba9af0b6b22abdfb96179b10a0d4029355235946 +size 82438 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15bc8b257cee861070e59726c3f11cded265d391 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a74298a9403be929c35cf7be909aff6e227c6d24a0470a586b1798d8670e98d +size 89635 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99cd11142f8ab6db3ccb9d3bc5b4312fb71100bd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e789bc1603d4f39fa30d5341101fc694afa213a2585daf0777a1ad21a6ed6d34 +size 1002983 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60f81f98dcdf542eed6410b32213df2900ee20dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2abf959fb0efcdc1857cf0a3ab28b0d6eadad10539aeed0a906c3bc0b11f216 +size 132618 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99ca27fbeeb806b2b07f75f5ac7e95d4f0942aec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:993d68d22be846546cbae3b3b94fc3aeea4a1ed1f27bfde665f639cf4e92198d +size 182536 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f08d70d5a8f1dc0f6d1af19944af1b5eef9f8374 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba7fc1984b4b2557c219d091affd1c7d1078baf562fd4e443bfef7da1f00b4a8 +size 27445 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b14b23c2fa5ccb43f5de32883e1b3cedd2cdae5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23df71b2d5f03f0545cc66a1dd89be3d480744c0379e45cf3b76c2b9f9bfd4e2 +size 120111 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edb348c4cbb71c84477e5d58f693d14cfc688926 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115340a8e0b210e0568da578bfa1253ce6a1fb832494c20317d88cc0cfec51de +size 57974 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc5cbbf794eb8fc6a546a1861b55ffa0c10c9810 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70a308773d040f0e8eccdb96e332545a450b87fead93b1ffd4122e4e3ca001dd +size 25409 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdbb8866e1045d2f33d71e7a25dfe1ad0206f47b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19459a31b0e454fd67ad36d015c7f2f308c30a43d370fe8bb111eb71562f0920 +size 39323 diff --git a/eval-results/mmlu/0/ckpt_159/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_159/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7dd21d41a9fb9ed1c539adf1dff704cbd711826 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1453b6728518384e5cd51db888c55e0accb73d63633e9dc70a67ace94a2a71 +size 32910 diff --git a/eval-results/mmlu/0/ckpt_159/results.json.tar.gz b/eval-results/mmlu/0/ckpt_159/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6c111439f00dfa3b0afb845f157acdbd9b0b72f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_159/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064aaa838b82796a2ef6bc46bc675b2d4e095aa5b84e475906b53df253ffc522 +size 7581 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b65efb272b23e5d8939677b986b0116e8968fbd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9c3ae7f0e84c70f9b1e3e35b78627c71f1fe71d6526add4b57dafb1fe3df163 +size 17010 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59d6198f28cf02cb5e25098516b3224b9785f452 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3489f4485d8113e4f977e8207df4f7869a4bb8650cc6ea34fb7ce4fb8035d15d +size 29795 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a34641ed9d616020691f77bb5f5ad63d5537c43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e398bbc6543e445762f96fdee6d6b16cc57a07c0fe0a3d82f1b870e03cc28cc3 +size 39835 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eeddad4ed3f3e0647ac9c5d0daf1a8e8939efb6b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10cd6c09045844d7252d04eb241212b3086c33362268ed97b3d28e5c4424facc +size 26739 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1089e6d0df76070f472fb7a55100e016c04cf1ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00d259175134d41a00b2fc2d4d456995b884b7772a7986a8b552ee8c5701de64 +size 61150 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f921ad1a8f574bb5203ecdbd5d96eff7f6d4abc8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71e8f6573ee4d9f860e8275c093b7fb4d25ab23a51dce105cfceed5cb24ccef5 +size 40309 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5af5d03558111c7fa1f91e7c5533c64b9468e58 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:568ae590d1670c17ec0186f60b1fe4eb82d5e59bd05292da18d72a54cf108856 +size 23720 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96eaf8eb2588824914ded94a5029f52b1cbc0852 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e0ef919bd8ffedcfa89ffcd87689f60a565367c5ca6d06e4bd37badc09646a6 +size 31025 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..226ba451133c4ba2a3f45fee1e7dbbbd28149160 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955870c9c6c1df7c5f13034bbf24fb5df438c77988da5bb62dd9ef8ea8861344 +size 22914 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8952ba7a46f855172617fef3fd6122d117d9e9b2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bd5b8cef8ab413c9718544387b3c3bb318483a387243d39baff304318b10316 +size 60882 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56e2e6f8a2a33105fccab7debbc2e9921fcd6762 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba344c1836d2d5ca8fab5c6b65ed6b3f9c40519c2bb9deb7310c1591c46a5303 +size 25683 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62882b8c0099756ca1e3d5d8a7c0dbc8cb24bdf6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4efc34f1e58fe8ff28e1b7ae0fc1a6f8979750918702a9f69ca6d686aec2c2f0 +size 25762 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7eb59107b47eb6bb6f159e31dd8fbbbb29065ea6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0754e26d3c6f6cdba75c6e4efb227de22ecaf559bee3a97f6bfe7a54b0212378 +size 46446 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69b9c865b679e65804aa7017be61661aea4c5153 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a93be0de36547834130f194e9a58bb1b343e01208658dd25fd4aa1e6d7604084 +size 31419 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f1381cba2fbafb673310691627094e94dacc9c6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5330d7199c79e9155c4d1b70cbfa871be2286903283ab34e159ede806367eb3 +size 28735 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0742bfa3e5f006e70b5ec9516439d1e2e7583b3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04543918f4899b5342b6b1385a21417c3a3a972d0da514fc4655de54407d100a +size 74332 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67d120998e3d30bb42199a0f83e7d9e532e2e7ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:524dc9ad1838c7025221097987a51f97f7bcef2da0feecf2694a63388de48db1 +size 30017 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df48df3e3a23a2988ee9b5c58f141b14bfcfb411 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d85ce3ef2b754692e51564b03308c4fd749ac7ba4c8759b998c4c9a5377be9c +size 19048 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..216fd01bae447fffe0bef14d31b7c8db71d745f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3261aac51438743d419d4c18ba2b6c130121b88520b11e0d4f07487d29dabef9 +size 87867 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a913ffe1be96c5f0f1482da8ecff0770003fa60f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244ec23539322791ea4745b0adc4a394a314daed2ac6c98b9d23b6f84c307508 +size 49848 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f0a9c4e633e3c541f7b2fb57cc5e7bb54db79c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16dc1ef6dab28e8451e3548feb8aa1952062282c909545186148cce3f02a11d3 +size 31586 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9516b84162924cbeb2ab1d602cd58d87bc706ce7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a07b1efd0fb03e0ec486d89e5333ef776e190109502ddf2bf898939e56a1d47 +size 145376 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c6cc212b7b1279e6da274a416bb5a1ab2150cdb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56e91affb85dd82e70a4636f1a227402770128be477c4408eabc08b927cae8e3 +size 44538 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4e0a2e0a8014d1193b2b255e7587c57ab8e7f52 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6ac7d78b12b8d32b6da8c98d2d59212d9cd3c3fb124226de4491a56ecd001b +size 54388 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c1c76be08397bae99debeff2aa001552aacf443 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bafa89c511ef587b647511f625174b0916192789349cb242b8de8778bb3e881 +size 92629 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd176674173dfd5f7fcefe278fb10777593272a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:995aee9f7fa1d812a1e00bc7d300335155169c70bcb031c67f81e4c681f04e82 +size 56912 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31559afa4ffbd9674697e88a1fe48697ce7a4dfe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e59cf64a24b1a6dd820aad82afd8b69ffb56c466902b024fa6e7def032a256d +size 58376 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95cc12f54bb6b535faf873687aaf2ff1357e0240 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32798b6f430f37d228b3fca747d2863cd0ad49792e3b2f59dcb4dd49dc5de93f +size 42791 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..711825453e1d50d09ba0662222d2577412e1906d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8093b2722623d823c3234e2cb1c53e92d18430f61cae6bdf0aa65c3eff2a0e4 +size 143872 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..247336c7c7238dd0f751005c39ff78472818f362 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0602c8a4ae8ff8925c869424d429eaee3125a21b42e9a6586c51727eb881a73 +size 71000 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a4b9db9d1fb5da68236e7d30c4553401064ed81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b12bf8476947e0bcc4134d7250a8934b3680bde99f5fa7608eb84f2058e31448 +size 161936 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ef8ef3bf039d2cbfc31dc9f1722daeb728741a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dcb0bb7ad33ee58b0e9a4960f31b955c7f2bd65c5415db071045476dd111ebf +size 209309 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92e7c4095714a338e9ee5a145cdb9e45a94f5007 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:371fb2e7a526de93013eef561f006c217d8948193116c023bfd152510a0a40df +size 49831 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc1ee907df30d45b437f43e691b30018a87aeb90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c1ae27a792023d9ecae95e8e9751c9b2f5efa5eebc3d4d61d38cad1fc796c49 +size 31494 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ab83b48c9351dd9ae52448641765a2df4144ddd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c403b98c3199b588cfbf356fe9250f75797d624d31bcbc84b959d17d439fa35f +size 35944 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..773ed362055125f4bd702a1d16f1e0b09fe1297d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d7ed83479090380cdbcd3dbeb30fd32070a809bf2a4e58971c15d103e9c5b85 +size 29437 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d6d653a205c0553c567335a032fc81faa89a97a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ed3c62189dff6c464037dcf9d87c403184f84bf6a7d35f58f0fd263d0f7962 +size 40248 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7dfcbb0057f12dc12ecdb62a6d7df82cefc1d732 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eacaa5b7bdb0e17ed7733a1eb15aeaac14b1fa4ded1279e8e011c84bce7ae8a8 +size 26980 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..990272e98e17e8d2188f765617caa488b5ffa36d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca06b7ae48514868008fdbd587331d27c2df52da98baf5c575921a42c069634 +size 21674 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5369466beace66466487cfd46e6f36c14484f2b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a5317937731a96aac45fa693f9e05406d5ac92084d0904d662554768d2d038f +size 57384 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27e9520d4cfd842a7b2511d0c0deceb9edd4aff3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3ebbc2be34ad69f193bcaf942c2ab1dca3bfad752ada58b9375b50242adf28 +size 22233 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7aa220e3460ad8760492ef3087fc29b0b5b49997 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a427f29d399b18958aac1cf8ab9c3bfabe524126877c8c6887ebb3a0def93f +size 169023 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce5e62b11ab23d41d68e0aec1eccedf2af628c38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881876a892139aa9f7e59314d990db4645956e31a61df2c92a817c0b6b467c63 +size 90091 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96052c7930b69fcf5e3f165291c8f50f1f5d16c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62886501c45880d3d143cf2e4ac5e9434063c0c1aeb4f6ef3863128995dcf6c7 +size 153330 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c229923d69df76b69fcc5c4bb2ffd333f96a7fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:097226745075b8c8941952e751f58c5a757715199eda049f6d602656deab62db +size 79263 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c99847ead21d31aa3b773f83ae1e69a3fbadffd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee54a3d1b4619ded577d2312040edf9ddf3ac52db4ba07a4b02f57832afbc263 +size 74247 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c6ca7e86d683f4eda5d376ac16a2e2aaadd6b97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bce76e31a62f00ba1aab6031b534c69b3c78322dc5c8b3d08211a82638e044c +size 82449 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..288ee623cb3f02578e76e011dcd1b43dd26d0c58 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bbae3db6747b7de7ff764b6b3018d26381930aa2699d049a178230f70c65088 +size 89776 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff754e78f5892acda6871b37d815dde14cd5ff49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466ee86dc90197b6b9e9bf92cb1413bbf824f632d00fadd080024a546185dad8 +size 1003739 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..671ebcd0153d88070df8d26e081e61ca94709b9b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a39da6413d5cd35f57f3748095bad30861f665592debd65efdff8d328468651 +size 132783 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e73d715e6ead2eed56d01255988854a01ba80b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c22ccd32aa603a75d88526884d08c73618251fd001721e266fbf47516f98d3c +size 182790 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c58ff8b80fd6e30effcc9de884969ed721bc515e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b2842a61825b5834373a9f70725650833d4435b551190ab23ec12c5d89dd2f2 +size 27527 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1876afe5f91a5fc5f6a124fdafc8ff2683990faa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef8e4d96d6a780dc861b5fcb5b0583eb504028ca28f4ebe713c506cd328b2b39 +size 120337 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..378faa07d4ee2da7d9660d40d5eda90fbacb0eb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0798a00214088c5e84b6df75302d7719e209871a138cdd4068bab7a27bb4affe +size 57979 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ecd22bda172e5ef94169398303628bced9f27ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d14e6a3284cdaffb5dc2847ceb0b3b6c06e219cb0eef57d284802d54360a14c8 +size 25370 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b7b3133d1badf28073303bdafcef6b2d3e97f9e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3656260b3a4b4bbad59c34603698fd49e0b7e892a5f1a3790bad00409c05feb6 +size 39415 diff --git a/eval-results/mmlu/0/ckpt_162/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_162/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04a016ebc0f7c7dcc14ef34b35cbbcb187865bb2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f767f94d383f7d04ee17399cf4a59e5b790073540f250a7e864007859967de82 +size 32899 diff --git a/eval-results/mmlu/0/ckpt_162/results.json.tar.gz b/eval-results/mmlu/0/ckpt_162/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..105ce240b0c8fceabf36310b65bb212e6fb68904 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_162/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799d832f68dc83e60b5400d989bfe899667e24f94e852a8b04ae3fd2c766aa89 +size 7600 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4e7a317bc3176210abd4b52b5b6266764d27b68 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b7b5a4a50bb5b33587425a88cafa5e538e382ee889128f2f8503f4774aa4dea +size 17022 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..596be53d10eebd55d5645e2b36e6db030ea00666 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d363499cf90215fdd1251edef6034d558496b6e8e493ddebbc942b1fc69f6500 +size 29794 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1429a0f70e960f64ab78e2b9f92048d95cb3824 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e5581f22fdef27f50a7f553556c3f7c5f8cb1cb1406e4a87937ce16b8f69d48 +size 39837 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cf71e517fbe5082f78f8a08377a63abae3c2dc2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04ebb4329db6943a29a584e6df27795d8a0e06398276bfa75f62d398f7056fb +size 26747 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f8ccea185f92153c73b5ae4ec43d52ea247dc84 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f883a9bae6200ed1e642e6f83fc2ff2ea44ae5d91ad8b31e40496e40005162f +size 61167 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50c75f1f8bded32d533329c1d8ecc8d714dbf94c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0891e220c7f3f05817a3c269314d2fea04e921586a078f256e9f48b0a11fdebb +size 40315 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0596a69104b9d3a114484b3605e1af465dfc156 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d999c3a2b6c77b08a816f4ba94cfe20d3e81b50d1a813fbe6841a6956299b58 +size 23746 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5e784c4082eff6ee71a30f3b25ee65b7bf3ee7e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4aad2bb36e49e9808af5c42c97f64e6b1400e25ba9a92f2fdf4e1a49d14813f +size 31044 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b30e469477c379bd28fd88c57cc430500d48d3d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fe326eb48030295696f28307a1b5d8503c63b80909404ba0330dd2aaeb6c30c +size 22950 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..626d1386f2754f9e1ee40c255f80f0d4e588e000 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f30ccada615453c6dd478c0fa4bc5db74b096571b4d67e792cdd775b226aafc +size 60870 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59d79531fc589efe59d061c1de8887280cdd4c14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cc7e3458de1410edf20b1b9bb420da334d0974a480ea0523c35c65119145d1f +size 25687 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e1f2b304ad8cc8f049cb88eb36e5f98f3f13143 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e66f30b6e1093b37fa6157c448429be5b3e5a766c9e36136977f85bb6508995b +size 25784 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44aaf64d93d0ac78c7be4b53cce4ff9ed79909ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b7f93042ca3c830e6681e70d7d515eea93d4665849c3291e59680c783a7fb17 +size 46438 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2b0b6cf17ee1680cc50bf576580bf9a385c954c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f1d33b45d9f2732d959d93311fac988dd2d3d241e1d247cc4c843e72ec6736f +size 31484 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37fd48e8f34ae6a7711535ed0bc726516d95b9df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ffe33443aff0134143906c120de84be53d50d25281c899e337b8a02ae83889d +size 28726 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4eceaf66e3d34403fc3014cb7b5546fa05f0497b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb516e69d679b0016f7006bb34b89ba0d716f81426f8284fda6a43c4d0881fcd +size 74430 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94d088dc9a789d149243eff8a4bad5418b042ef0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781f937b0f5fd044de847478489b9625abd24704ec198859cb8a8bfc286d3f0a +size 30043 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92dbfc5f9d197f86f1d94c0c0e117dbd540a77b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:367e9cdf560db7497d7baa75cd47e2e7ad13d736852fc226f9a674e0c3fbcf5f +size 19058 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17023d1e4f06fea1f2d1522357ab214d3436fd78 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6bff4c239cac5f6516d0264671965a4a9313437367f09c7c0f8de6039f0d402 +size 87929 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a76d0d77ac7babbd338af4da516bc497b39d5cb7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b616dd7d45895ff617f340341d0ca898ef84cd5c486792b2ab0641988814727 +size 49891 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..193f774fc7f80c8d21200cd90dd45dce49cb4f54 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:952d34467d062247a2cdf5002dcaac7f71eb5e1c76332f71c4d6543e216788df +size 31556 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2537496e851f6052647240b41fa6679cfb2a21ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b518207d10c5ee9a174741c2c67bcaa9e02e2e802347d5c98f18a1dd7034f67 +size 145378 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c92a1fa4e29d9665415c5e8ee9a622e35b9ff5ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48907e5ff3982a85b56239eb5144627c37180d729a131754445c9bdd9a5c3c5a +size 44562 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc348d9bfc38534151e9e5be1f616a1185dcfed5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a442d181e162444c8e5f2bc8b8cd62048b9024ba8fa840df9552060e88f86ec9 +size 54446 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3875a9a9078f79324f9b2e0a6584a146cd429978 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31c4bcdf8ddd115a6c3dbc65500a54a1d04e981492b64aff19bb5c2729f89408 +size 92618 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c14d7fe6aaae4f2db1094e46d4846998286b6c90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a3d02ebf0139409512646c2757a2d171490feb535bc606c6c924f1c87441e28 +size 57047 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b88579d4557a82ec83c8e86fa7a23db9299b826d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a66eadfa43d92b8cfba6b878a6f53bf5157ff6002d56ba5a6d2e7c6261b02b5 +size 58404 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f28dedf3c53c96da6ae9f56f63e0c44bfe4b95b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:391ea08ec8bde987d94700092714b4e6e8a66e1fc2937e13e170eb91a7d30615 +size 42818 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ec0e4b666ba43f4c235a1ee2289c18bcbf2870e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f16202bbeb63aecb7872ba3399468e24da3c9ed5607b02f9f57bdd085c892ae +size 143786 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56c1e156f88fbf45c2963413522d4cec5a0f24f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:544b2ca0e364239acdb4fd34562acbe92a6535fd8615f26d741f84fc5ea60b18 +size 71122 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d46ee2ed51be1e88b67bd50bb8e44449c652b567 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d765c77e766cda777946a1fdb6ff81a4dcaadd590c66918c54933560bfebaf +size 161979 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e00fddad31038acdfa31f5be37a882269e7e500b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb346f56b2ff7ac4c8ef150534f38e70b0a9e22e439ba0d9d5716cd54c9f617 +size 209397 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..416d31a53f965ec6d90c9415384e7c65307fe5d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7683ed0e637f1fc83009a0f88f1844cf09bf77df603ecd893ec62c09fccd66 +size 49889 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a477206b1cf0dcd1faf1f13e72dfc426eb530f51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10ce8fef1a2cdb3ef17396acc1a2349e0cf4b413eed08a01f9f872d6db409cbb +size 31503 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e18ae54e27a361f2b31bf17fa98735aa1c217703 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a1da4410df019bc16ca8dad84de910469b71e73077ce8245721aa447feeb0d +size 36020 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43d7e3bddbe6a08a5f87407a015a82f93ab335fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0173bed1b1fe2d4f8efaeb237528c79d790ac5353076f98111a5f2bdad300a1a +size 29448 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26217d5c76cc327968802d9628673abeb56b2b7c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99792c1326a931642350b735f8d941d54d613eb3d3c272e7a275a198663c1a65 +size 40330 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..432475c7d56da8772d50cba8f8d6fdcdad0ab13f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffd38955de503e2c22468cf90fae544d712beb7c44dd7380158e012ff04a0ace +size 26955 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b78c62e3cf26b096b95bc557620c0fdf07b0926a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c7597cccd6356d3e3e36583cbf7a18899ccbc1178ed0146610be7406b27277f +size 21674 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b7c962d9ce0a36a7ff0c9a8d1ea09f29927de96 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93577987984dabb642b5e818742105f7d9a36115f5a4b108772487ce58286bc +size 57396 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95e1469ef833fb3862d9d66956631983d3eb491b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4566fae61b811b146bec88051514c75a51ee9faaecc10029c1c4827410303258 +size 22236 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e2a1c4cbaf91062b2295144de035e45403e5d1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d6b0c5ba8c2f5f7248ee40063bcc11c9df27ad557a9c9f630d3295b6d5921b4 +size 169209 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77ce28074ad6b20893af8fdd6fb4c22b70191146 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b58b01b56ecbca9a636c661b5c6fa7d3b904c0b9130e4687af83995a123626 +size 90218 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b8ab9d174d8b1e8716cec751efbf4b7fe3c6054 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3315ec10ceeba25b84966c03fdefa787f0116aecc45e14263dae49689d7c475a +size 153505 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b4b2ea7dcdc4c3b3ab15a4029f17b401a305f46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d02f416ecf2c646752f1b035d10a3fe8e21e9893b01cf1ca4581b1414fa26dce +size 79379 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba2d4ed97e1e904f83be4d1571f08996539cdb53 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3959a7cf0b9e08b183b4e28dd7c572db46dbb44a0e3bc0ef95bfb7f7624de67 +size 74255 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a727a6f8a284c3c1f430cf81e8b06291a71b683 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d2cc4f1618bb0f43160ed0c9d1069ac7602624988657abdd4066cbed794c260 +size 82524 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8555a8ce6eacd264a797cb19b9d860bf0d638a04 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293813ececa12dc9904beeef0adfd88669d2316bf7ca609e5fde45fa9ed4bdb3 +size 89815 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1956698058336c77e9e87707bf052d973a1f0a4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad58142fd8852814f17b87d3024bcb6a7ebcc9630efa6899bf13e93ff09384b8 +size 1003551 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9f4bec598e83f793822901702e342691566de8d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d29e5d8d2c8c00107e8848aec424debfc964545464b35a13f2e474c20d44be57 +size 132800 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e4dfbd10ba1b5fd9e96a16b5b54c70d45af1173 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3521c1fcfc23022014d87656a5763953fa1d019b586da472458f32013fc047a +size 182851 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..597d2b68c1343126969a98aad2d2eef16df9924b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f250404d4c35a99ecefcf146d28810064d5c47a559eaf377ecf8f3aaab4841 +size 27518 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fd096df05c085729f2573739d25d9484b608bb6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3fefe18ae6d5835b1221a2abb33d42e61912f1b3b21c1558e3b646bc108375b +size 120345 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b78a7c341059a4b71689c6dc98f0f5326c99782b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db98aaf9902c040f957ff0da0d53971806da203768753dc47679228063c22e3b +size 57956 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0007074c8f658f4d071a75d6c32efe00cfffea5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:817eb62c782ddafa479288ff76901b13be8e46fccfe45c4fc87892a99f7cdc56 +size 25417 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89518eef893e7846ffff9bcab31a2aa80719c082 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74cb51ee1194337cc54901738b292a55b8085388a5ab1e5554b9d5a669af99eb +size 39446 diff --git a/eval-results/mmlu/0/ckpt_165/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_165/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94d9e6f2bcd22648e0b5185797b7ce51aa06b99f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9388844e4b8e2c17e4d45f139182e8e7ee66731aee2acf24bf41b52b54960130 +size 32904 diff --git a/eval-results/mmlu/0/ckpt_165/results.json.tar.gz b/eval-results/mmlu/0/ckpt_165/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..355cd70a232db317e6f834651d2a8d33bbbf833a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_165/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01360eba4ca7977d543d0800d72e1db6e5e4ea1bc1c052b675dbc23df8640d48 +size 7591 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2684687c7567139159ad30f9cfc2f618decb4674 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf3a590e5fe9e2b2d2f195f30dc18e70f3678be1be3500683d7a590c115d5c2 +size 17007 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6684cdb348c935d1a42d1f5a13ee9217c16a1796 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea65eff8c0df171c85cbe2ea03f21ad1548c31901b5fe7a2bdb11b35e56662a +size 29820 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e75e901ab636bb504e77a6d0f1cf5fe903d8890 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaecfb5b6769e176a3652c257b5dfdd4cc43982efa51fe078956726553f58b9b +size 39846 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c606850070637af3e3c8dea43c43ebd3aa2bc1de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b76b0ea95be81ec21b3d1bf0cec64d19aa6a7dc5dae1d3dd1e79ec7f6647b06 +size 26750 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b079ae8d421fbf930da09710343b02c66c03246d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5416d9e9084ae75981c5c9ca030457ceed87b650262e356e81d0176ed6906a83 +size 61199 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bc7ef261b74f14104af173edf13f3761e98ab9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b804c736a47c09d3ca55a014febb189d67fb9e2f4a071e7427bc72731985bd93 +size 40374 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2543b4b61a2ebb2727727b5789ec163d2b79b25b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f21c0365b4d41e78a833e11dd7a950386d53c4d23ad2137ff892dc088eb0296 +size 23762 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd89f9c9b0f944642b000e822687d9239fee5ed5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15da22929f21f2c8577a878271232c76a612f91b19e651a79dde96f212dd6b57 +size 31069 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6dfad4784bda4c378e6ba8d0520d36855c496c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ee08f78624f784232245d5c1f8427074c353cd8f24873a19637f79a50a69eb +size 22871 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d18a18c25c888387ac2143f0397e5343f90192a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a36ce4ea0bc7f7c1ed9c8d092d16f9360c6ed79d9d488a2099516b91a97b9b3 +size 60949 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60eae3345c079ef248e2552a587126fcbee48ff7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6943adc684b6c58b863a2bbf993bf66d040c265c03a664139668ccf0940f7c3d +size 25653 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..645b0c7a7cb252c7f69d551cf0b6816bc299eb84 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64e9232f7b10018aee3023f4fa96f54dae27c5d42f0fe55c002bcb142fca5282 +size 25791 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4a164eb2ff1e6592b27f4882edc2c0947d1f7ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f8f3aab0041097459dc965c66d3d026b67412554a684d8dd423462318c7e01a +size 46446 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1922f7d0b71243fbf3db6ae9c958162bc7c6bfe6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18d3a7442adc34125cb043d5cadad5e1e489290d52f3a5f5e5c1ecefb3179e9 +size 31432 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fa61e2737e7421edd86604016adb589a056970a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84f31eaea38b1b1912c035e9190cf781cbc8f57cc6118408269ddcb5fd14a73e +size 28754 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..292c26effa6768173d6650c26a5dfe611f5826d5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b395f38d58e3109a9782e33288c9df7c053cb55b4742430a60d4e5d6466402a +size 74520 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15e187122fed043586f05510a1edfde4548c60df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275cc2a271f1afd25b3bc3c02db5a1a7335273dd0fc30fb65c040a9c225deaca +size 30069 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3e4195a7ff943507574d4e036eae97832c0c64a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd1c6e89ab85cf4ee74c847c9866fc3efef25a3a668ff266d1f7f55170826f29 +size 19103 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..884c8215cba048065a1828a14a3b852411649b09 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a30d3fbcc572a1d913875a83ec6c000d107480f94da2104dfb5a1f0a470d1c32 +size 87909 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5b77bd20ac71d22d1a4795aedb2a620a62a4796 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c913db5edd78e4ea4b79e6fff9bc31cbb1392c5687f078ebdf41d8f05c788810 +size 49946 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffa98c4d8ddeb3f2dc40d34c8bb8c1753a83fcd0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81ea91a0155474950b149d80e6fba147b5be95b351af4bacd5f9afe1ae337972 +size 31563 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7afa578903373b6070532de2b20cabf739cb87af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c28cde544f2c82b8513cb5eb902ca0f5e8d48afb6fb2c554483f8f172ca61a +size 145455 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..114b408c6f197d448d759677a5c2a55369291ef9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e0450e9bcdb7b9df2c083c281e91d4c9fc4f061059d83cdd7b048cdf898b40 +size 44550 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9560659268780d972eac1682207f933e6377151e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:421e0b9fd668fb18a1007bda95662f8bbbf1e4f0864e8f54aec17d45ba9588ca +size 54373 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91f1b393fde4f7c29f30ad0f2188725ae04361bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be93fba97e4aba25cb871da225780e3506c6060137348ab6b6071ac1ee57f99c +size 92702 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf77bb0a809c4e0503cc9a069f26bbd7cad4b633 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41be78480867d96fcee563387628e4506ea19173da8918dbf5d191688e4ecbbb +size 57057 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c835a23bb6ed559c3bf2dd2825797802af995e5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5989cd4405e860773588fed1642a554c889945e308ec28bf9b904c49ebbe4c6 +size 58418 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08747f73f87b059a4e7633985bcd37f5a18ede8e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a7efb3168fdfae498bd83d8b3158f1e5d6d5875bf1560065cb94e817f4a8904 +size 42767 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b21e0c397bdc61d40c8a3debc37fa2daba0aca9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9887013f333c51401f62bae85629bee6616feb701d860f715a0f4d90593e789 +size 143899 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b1bc3770189f3110aaee4a6bae91d77c0ab17b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c9996fb21db229a65da9b9f19f1d9bac99e359de646566a06adedcc4608079e +size 71129 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2e3f1179e621e1e171adf44be952ddbb057cd28 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:207a8be1b53cd0b96790181f50cadd24e761da4ba929643fb4dfd0e4465ca049 +size 162052 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10d3082d3f0182e1e5ff1ab17378c84b30da75d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6e1cd370532b6b2d0f97dfe6cf64ea187536eade41f2439ed15d7b505b50a67 +size 209598 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c48b77908ce35acbf40cd67a6a0818cfd99fe6d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cebd25340caf3b3d8517fdf5873b194bd7d693135b262ecae24d344d4e6724db +size 49839 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6a9a9d7173736a32484f22161ca56c4529d37d2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03fadd6bf4b2c541c505a464fed086974f96235ac6ad5255fe9e9510d9b152f3 +size 31521 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94686fe1be921955cd8efa7d887f2645aa9f79db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e90da44eab5e5c03f4e12cc724e0fee0107d46329cfab636ceb590113e43bd10 +size 36013 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df739a95ef514ea51cc4f62dd9267d44e805d018 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33bd9382230878bc45a85e6e907a338e6d747c63bdb429134dc97bc433510d0b +size 29467 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e02db79915469d11cf4d0672a8332405fa85e2a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19275ad05a036f2570851c7750b7b7823ed463f642a55680b6b948eac0b42e63 +size 40351 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5146edd380df4dcf7bace81092fafe8a045dda19 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b08426ea6d5e570472dda248eddc598159e4b59b1a1157bd52d27a21866e280 +size 26981 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85a41aaa1998c1c4f01dd5913fcefb0ecb59495a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fa68449d9a536a3097b2ae15ef04d6de460522a1f22a4d57099e6e43f7db81c +size 21738 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70ab1e62b8f2fbaeddd9459634e6135385eb99b3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc0c958c68870a22199d352c7190b5f624272511e2782888b6cdcea053a2f50 +size 57392 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..960afe96846e97ef504faa9462aace592e3ed612 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a53db3726b0d3ba0d0b9cc48c7b733e6a533709e2ffd2941c4767ff80eb9946a +size 22290 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afc5e3622d43d9e86fc20cd5762b5d3213804df8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9abfadc475c4c132161ed9c8d5995001a380dd705b2b82f4ec443b44addf995 +size 169151 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b585edad9d4b31747e5f11352c184d863e8ab9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:176eb7349008cf58c87271080385af34f6ccae355c8f914d15c6da5640f6562c +size 90173 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73babed796c9ea67c8f584d10cffe290dfbe4f60 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:069a6a125dd9b0125f28ea8baeadceada22efefc1069b12c47a28d4e0655ccbd +size 153804 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24cae0381322580cd254dcb6ecfab0be41f591ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa0090357f6c40842ffde3615d047fdd9c6e4b5c16655890894dc5aa70452ec6 +size 79423 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e88ca83e12a381eef7fbf1ce9ed14d3eeabb86c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1ea34ddc1ad4238bcd8fe6586a9eea84ba429ae3375cb29ba60db7d0b71ac62 +size 74279 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5938a7fbaf859219134e146264896fcb8027048 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f166e364a319a19c5bc40fbadf2151b1a718b8f8f40ca4d9e7a890672bef0be +size 82541 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d99fd999ac4544809e451fc85dd971c5789d5c6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fbf684af962ab33c3de93993df24b7f1d49ff7adc78e45f53e7df01eaf3221d +size 89851 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6372bd0755bd06fcb22cffe26de01eec06b917be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9625bb6021e84fee52ac8c5586e1fd7f3d4f342864f29482e781bc21090c0b +size 1003802 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8970ae9a3bc1741a77df4af98c440d486e062360 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aff98e204305a968581ad6d94e12f847289e8287a5585112a18933306a5acde +size 132932 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25dfa14f5f584ebc04593712279e37c48f22b687 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bb58cfdacfe6534cd1db13774de171b05df2b1998b2ac45ff7c57c1eb615e48 +size 182954 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6db05c792c8deac042c1a03add80fbde2377ecf3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6911f1a44c45e4c4ce2e62318034358c911cd5e5cc870dbf157dcb0ea2dc0 +size 27556 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a3be9e4e574d4d54888e993e02b969f9c11f4ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8121ad09a12545aa44c6a087c09e9a14c4752f5a72c9aaede3a9ff28bd6097e +size 120381 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc02787965d4f1f8491d9b9c854fe8a3b2ebda87 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e05d812ebf71b03b371be9605cf64bb9dd3da4678e4bacf9697c873865d4c2f +size 58022 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a1ce529905991099d0f679446274a7325f4ade2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6278ed697cc65ca76949cd4534faf71529db5b8367dbc7ffb525a557b996052 +size 25412 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e81ef6ee25fc1f5e76cbe606e8123473fcb28ddd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab49294b033e845e770540192e7701062573ff572927f59e7024cf8fdab02c1f +size 39462 diff --git a/eval-results/mmlu/0/ckpt_168/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_168/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..114fe5cabf05cae1ed6b7bbd5d9fb70ae6214d24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf571d9ca95d452db6bfad8ade0e16c431c1350b2627c6b1fd12cd5c6f677d8 +size 32923 diff --git a/eval-results/mmlu/0/ckpt_168/results.json.tar.gz b/eval-results/mmlu/0/ckpt_168/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3b9e7443cff5a87354baa43b6fbc791962bd51a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_168/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe87f5f1769c0adfb089bf597ab50e37a0559062c15534b0bf77b3695e1f48a5 +size 7595 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bb6feba25c39bb611ba4ad38d0a93c9b02152c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea13ecf51080d92eddf43981b660c25a1d4a385050a6c5d5deba095c3ce27af +size 17031 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d8f3d72449cfdfac1d68f4f18de3c822fc87c0a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59b6bc1df8e08c44553493b91b63039e5e1e821c72b0ee197bda26b375f72726 +size 29812 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48a8232b935c605b690e1eeba79e3d5904849f14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:057880c5bbe1e0dd08be8af142120301981d17e2f6d697d836856df24d2e4338 +size 39849 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d384685b4026521554d907ff66913d56afb5d4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b048ab7ff2f9f6f7d059489b327de3adf9474f30e1cfae38b22e60f19d161a55 +size 26744 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d04089454c53950087e438cf64517346714cb870 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:518599dc81cf5c41c50a5a58ead583a3e1e23f39cda3ed9d23362f39faad3f41 +size 61190 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1546fdd8a786a6c96d49212bcb3d218081b080be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc5713456aa259cb94f27cd0a66b57475721e44ac61da73da261e0a69f54f79a +size 40361 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b20c2aa75e11bf3ba38830f6a86ad35b4b91001 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ff586ef2d7f85bbf0d7f61c951969a2fa0113bc57d078dc84bcfd83aab4f5cb +size 23783 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d244854e3a694d5458a7c12555591cba673bbc8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d9a08cd1d8188a243986cb59068ee6dbddfb897c30004f0555513034a110356 +size 31109 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd4fc42f7be6c1a29adc537503a93eaf31ba620e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eee6df025fa11c970c79cff78f9534fe29aaaa4314da98d365b9cba835fb2a8 +size 22894 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d6cc8e7988b24eec6bdbb862340ecb811b46dc4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:379771304dff22c2a2084a4988f5c0d178f4da0e7d1e2c5586d2946d415dd176 +size 60954 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfe188c7847896b84525b59fd3a00237105f4022 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25017ba4ec6e7b76675c5320a74de6f4c7285d56c634b205b0933d85f3a5d3f0 +size 25703 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..603698ac4f2643106e564be68ffa07813c3b28f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d136cd5b302bab9bbe71dbb5bb5a590a0ae34ac1c85b26bb53364cea5dd527de +size 25760 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..558025f0756a0c9e360dfdd1ba4420662f3c4195 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fc1c786bb532c57cfed9c84b6b75dde3e47b8a63c5d6c00e1f70eef751279b3 +size 46475 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e25f191bfc29083c6941c2b4c88e8d8a1169483f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27930e9f53356c198e6646c42e52893b122bc19334cbb82b5d069c99ab03f6a9 +size 31447 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..298f06fc0f32e8bdc5f1a21b44eb6cc50a736614 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52cac1a0dc9dd66f0f003524dd48e557bd185d0e4d0235b2bae699be4714a141 +size 28760 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bee50ac38dcc2c07784c9602c6cbcf7a725bff0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d3cc2bfe5e84d245366b8d69396bd2cfa9ae718e842c5ea0de456869f767d2 +size 74522 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ce9c0ddcfb8ef5fa01d95ca3c7c730da9fab7dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fe60a3a4acfff29bccd30a7674feeb8ace55feddcdc8b18528b59601672da9f +size 30090 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f366c0c7e2f97dd9c6f5c5d7817d4ba650d17c57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67caec9ff4244eb70e5e6c1f3bc92716fd36edc869555359f6f84323024234fd +size 19109 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85b1271db683747da58edbeaa08630314bd92433 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0826544208a3db0bbc26cc3fe3780352b0ce81d0e2fff5922f9268279d6afbbe +size 88020 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d089556d0eb0152243dd316127078c174bf0868e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0f8bfb91e7b2a30102c883cd89c2eb85c1922264e0573f3f7fb10649e886ae +size 49918 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff0b51264e190ae46a5175c0d21ea8c51f4ee387 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47d6ea4cd9f135f86de1a4dfe91a48b81191fa78a9d582a2a9fa80b523a869c6 +size 31591 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..184a1fdbf35d4478ef0aaa09d3ad0b99e4207b89 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43a5546bd4dd753f693f66b34b738b4c4b10a24a06b2e7f950b1b04a99aac4db +size 145473 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4dbc56dcb995382d1ce24df12b992ba5dc26503 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6f01732ad45af75f8ddc075add103156436c72ae685ea25ddb06ee955ccbf4e +size 44560 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b5a50357a6832a6f17d0da9ef932f48f58586cd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43582d8e66118544ee5a3c116b084676b446f9951c4a07c0ea85467e0dada91e +size 54401 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff4fcbe4bf0703d5e32f6ff90891aeb76da2a3e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7769748cd4a2f6c24dc557539da8c1ac5387a5f8b35f3ae6229a7aac12ceff4 +size 92684 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa6e4610457f96192f4f8ad35b932ce82e76809d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bd33f6bf623729c070619abae91a742d0014b6ed5e76de51fb301b963a7b8f2 +size 57006 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e0c8d93cf1c6f765c8af4a8b07238bc6cdc8fd5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:051c3b51256c3237a22c800c31fdfa13f20df0a78faa65756f682cf1818fdb7c +size 58423 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..584367b934b51bd8fe47b0e98a069f4162e79748 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2ee277997ee56f4a3b4f3a8765b293039e74123929d7766ba89ca6aec76d95 +size 42776 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dff220db36a46a77c429be006fc75db1cf3754a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3777f409bfe08a678a95c9b2ada6d5eef7089c1e3888ee42fbee962f71573ecf +size 143863 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ec546582138f7170ea1ed26c14d16914cbc72de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81562900296f3658567ace42cfc67ef96583d50511b9104e04cdfd915457c03 +size 71118 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf38120690cfb1286ec7faceb33eee0fc112faaf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1488ae4f8b31e15f1228d418c12dd3447af9b60f69be3c47fdb748953dc60e70 +size 162049 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76ab7cdbbc33c3429db84afdfedcf7198c732a1f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3629488210f4c374adf8cf3c5f1256ce442a356f805c8d7aa4f38d73413741c4 +size 209472 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30ac3c645bf76c083922a2433add912fa1e93ee0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c6bda4e2357ea493e7a2ffb73a0418427f2e273d6fd8922fa2e7341c105325 +size 49905 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d91f625821f2a65cd8dee997027444f275b3d98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81232a734daf63de1bbfa31c75ca55d687c5f0917f98697b34ea9c57366f66d3 +size 31537 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..818863220b600c8a1882b3fd2a7e1ace3691252c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f92b6a9425e3b23cb3fe4412d51a6427a5982088447bb02f941644e37fe774ef +size 36072 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43b2993e1a8be74d8ef1fa04af487fb05b5a1bd7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84ace490a82858900084f3ff7f8161fb545ed5ff6dac7c3e742cdf5e99e17580 +size 29531 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75d00f11cc76f191fcdeaf82657d84c43693e46f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7fad404885ef1c8c41b8dbaf76e3fbd63491b6600961e626eb8c23c7e42d741 +size 40298 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e734ca947b76c062860a832f005731b56882d6e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e30f2c0d68889aca0a6f64352ccb51c2c1b1a6586ffeaf26ad78283e9d4703c +size 27000 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea8dbd8dedf65b27e62640207b8ddd5e121e1066 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f0eeb06c4877ce9986ec3e33979949618a7ce60f4325f5a9e0885af2185624c +size 21712 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4c4dc76fa82c3799e21369f757ea4ea2aad4d4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6fd5a09d8ba5b4af5cc8b4e9aa3bf9d2dbf17f1ff128b3490a31b15ca805674 +size 57440 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7400894a9f6e0839236d26cd922dd3d20580736 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebdaea395f00a4603f3ac78216c88fc7fe1bdd8a822596878b6ad8101b749d4a +size 22284 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb09d62eb49177f3f2a27ac4439db14f9765804a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:951304047663675a328b324e9af0a23ae2d1d426f4f4ae8c7dc4c3e307dc9583 +size 169294 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fa32f2ce18c3864d31c97ad597702d82de8f07c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b4c617024afb5681818123f7cba8f4f8b77de3a7e55e286105b72133f231e2 +size 90153 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aaa279c603a32d8aa88b90ca546b6f59835b9097 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1bbccc4de47aa6a83551cdd672fc84b9b372cfdd6a86510b84301e28aab0d3 +size 153799 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e054b67699bf028f86580ed7a002ab7dd755996 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad4294cb8b7f8d9505d632cd689e0ce63590c0e785316bfae92b89f3af3eed4e +size 79404 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..000c689028a8a8de31b5224f1a8b966ac6844956 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb3a192df166a32818314dc83364a8dafe6a98193418922992caeb0eb930fa0d +size 74276 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7da16a6aab77727c63f06f49d95a303b386bc6b3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836cddfda9034fa3e771429ef8a2be39049eb5c7290159f58f1c914ac9d6b3e8 +size 82468 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8972f24ed36d6323a94b45c92a4573aed2d46543 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4ce3371a5e504574763c6a07796155bc1930a44425c781165732bc295158ecc +size 89887 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7720b2f577056a07874cbeb86ad9c84a9716eb62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48810e91ba10c225bfe483e26a9efa200be5ddbbe80b44f6ae2297a4e40ad93 +size 1004180 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b332694e7ae1ffa81a1b3e52bd60bf64b50f8e80 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a76d3d21c2051f3cde948cd7263d2c4adf0b6ed5e0767ea87086d586812f86 +size 132998 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3657a69f8a8547389ffbcff0d3095475a3464146 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5eab8903460f9cac927d44f88b513be41333262be4a63532a5746ae922c2141 +size 183002 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69302c4634c2fac6444bc2464fa05669468fe11b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4c7a8c1f6853ed466e3d36fb324e1503901f81e9d9b3157fec4ec83fe9daf9 +size 27559 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..adc57ddb187deb87694d0c039674a04cb9b35263 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aaacf05c570d772909d5c10aa15f0f2cbc767cda11d7333192503db2f65b528 +size 120421 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7abd68fdd832cccd5ba08ecd8ad79186954fceb2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5880248c92ffbb8a80efea9cd3469b8fa8605f8a345bbf4c9a5c8557c75323e8 +size 58046 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59916ab88ca087ebd3758a3e47eca53ff20cbc6c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e810f151b49c2767061ab871241b1aa1298a55f6089f411510a06932f8d6c8bb +size 25442 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1b0b6e0f7b10bec5ae2b213c5fb9b2f4e0df887 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c41cb3b4754fa41b5ac625422d29d8b439753996848d4255e399845261ca5c +size 39408 diff --git a/eval-results/mmlu/0/ckpt_171/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_171/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..637079d1f702278ea30e8a57b55c386389219261 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:424ba4ba7ddac4b71ccc4feadf395bb571cb3641f4e08ded966513be964a83a8 +size 32962 diff --git a/eval-results/mmlu/0/ckpt_171/results.json.tar.gz b/eval-results/mmlu/0/ckpt_171/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d5ca8997579aa26f221a70a2d24a4ba533003ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_171/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c91ffc1840a580cbeaa61ce94dc34d2b2782f237be4fe76db2753e12e335a969 +size 7604 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3af3789262e511a88feee57cb945e4899b2e6765 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d54494270e0856a5a524b8f0cfdbf20c1fea11c5b1db9045079b39a08ac507 +size 16989 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af43c31ac8967578142d75bc897099f73b722f1d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc18e5e5c3797560e20c17b775a51e5b7a6b0f1d3f46c6207c757533e1992dd +size 29760 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d21ca17347e2f89b9652a73794642d95ff43b2f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3d01563ec4a8e0c7240b05b2746fa8d8f0a156a7ae243450c1904b2f18078d +size 39776 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26ebb7832b55ccacebb8ec50c74f0bb46ad08703 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92ec189b36c12cebf3f05bc299039ee2ffbb66337111da58fc4732c91b3d39eb +size 26736 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b21b61d54820e24d4d849790b8b626e391f6cdc2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3011e91015b05ab1699f5a93b7ea440142b54e933420d4859ddb93a7cdac0d3 +size 61167 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7389af2e23926fbf2a095c5a15d54abf1c3320e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30b1241a4099e1166fdaa5809b4c5da33aabb4bb9a9bbbe7b5b576b6af7d34b1 +size 40340 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bc05b264351a7673591dc130a319a206fca9106 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea00181c5ce1bcfb00cf0b7fb679a308ef86877ee4c36cd241c3b1cdeef83a8 +size 23721 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f20cab5e48e36b929bce7976ce8de1d12911cbb5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6772484f5fb6b52791bf5e10d1e46b44811383387caad0b2c1df666714ea2254 +size 31055 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cd97cf7023868da11affb6bbc09e5010e765e43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c199b31de3551c456e9d808c4beabcec9644c5ff44bd2e81aca2e28395e238f1 +size 22867 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..150a256d924e53855b2e523634f6cba6ac7b6e1e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b10095d88e266bca8a3dcc170ff0b852b751ddb117c61b49e7a40ae67c8937f3 +size 60857 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37fdc77087477b0fe1f48cc9df50f97a0e23496f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa1f7c5b39173f383f1a56877150bd5b845c9df649246e7554d5e29fd4d8d98e +size 25648 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a22d349ef8df59004d61c00245a449d6dfff75 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea1f7d8f83ac55b07b9bda4b403786554329181d2a820e2982c93d055b57049 +size 25789 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9462780f2edc4f21aac35cfda7184f859d44bfd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a6ed95859ec7a928f2574b1b8e63bfdca9143d9220a13bdbd6a804a9fccbd1 +size 46376 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..455589787f927a8e2d77dc63e783f75135c0e2e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afd3e1d442a95dcb719e4a89dd003a4b62bb67765f85ba1ce035be0e31855865 +size 31423 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0656e3b9afccbc59be6c0385eb9f88318ad6c8fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13742c3f8f4ffc956d569aac8ff8930ec6a448faa129094a3d19ba629627e4ad +size 28712 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e62b4a4d53ae997ff44f03ef7012de5562544868 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e288091987266a1035eaf1716d300c55e28f96985d6d47e4a672d3c86de094b9 +size 74344 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..054e1f0c6c0bdffae98c868c0206b126fdaab166 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f839d8b4141012518c2bf449277dea506be129a010b3d2beb62adf69d0eb368a +size 30006 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd04512a0676906771e0c41198e6e21ccbcb57d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1c0957fbcbb45a9285f87375f946cd147738b8b7dfd5e19c563fc0f0573ba2e +size 19072 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51b602a4066c5bac74e0cfa6e307e48e32465036 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae5826dadc7ee87830405ee5cfed955fc6aa368ad5b0445c54a64e6ca44c26c +size 87922 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04cc0d00284820334107616fe6874db255eb835e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b396ebda08fa5b584228d94c6594e905a5545d62cf713ad1ae7986a56ee9d29 +size 49845 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..594875e64a8beb5ba14ce7a6339bd6c8cb9618db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba56c376873778493bdc12ae40196b17920b7b2657b3ea1a3632c46796d11022 +size 31573 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae05d41a98aa7ac583e6f12872faefd67d8c7431 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:443586d6e549a99d6ad9a9d7e871bf254f911e54773960b8a90b98af1fbbd643 +size 145386 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4b290b0bff4b8118ed76caf3f204ad0e5edc20c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adc065a055486c918567e1e9e807cacc603d1fb59413be9d0b39555b7e3a4832 +size 44566 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84696bbe27dab3dfe357faa67fccbeec6ec6b473 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:514b372ee2c73a16a78bc26115a3d4beb9c019b2fb5c07ae40f6cef5858aa062 +size 54396 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..964d12e3fe133ce86a8f2eefb9f02658a86accfd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:687838f8d0c9f2570f43c786c47e40512f57186f59f8c65dbaf49c10d489799e +size 92616 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..049acce77f937039be9ddd9b197cddf957bbe19d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b76a2d62f0651accf2b8266376f36640db8e6b21888138fef09668f1de46cf4 +size 56988 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7fc7cf2588765f310b20ab7c2d0f0d6f3dae7e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0708ba79daad4490dcaf19d250fc472e999ea7bf7721fa731169a2a513ae771b +size 58336 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee4048c6dee1ea344a629e8cf7b40945ef781874 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e41c588c52046fe1eb7509ae6a29436b13e4d0d8b855c8307dc548bbab439aa +size 42809 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc32fa50356ba2743486afb5bc04889b1c512044 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e107413c54cd06dde24734ea0a0f4b040e86b6e2af44a72d049ad9047485748 +size 143764 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..417428676db6a76ab471a282a694826fb1fccbe1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4775812769175b22f057d02a9dece849d06a42e911cf54f22d201000af7677d +size 71028 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd2e0abe6a5166b03df5fec5c7d95c6ed1fde518 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b739dd38a90c14f7730501015c9fd2c744effe20624237549cbb1af3403aebba +size 162009 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc04443bfd1ca89eadee1682cb232a22ff71b356 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b6da15046ba6b3690889fa1d28d73f833a97fb17bcfad489aad2cc0213afb82 +size 209352 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a229e433c56b54918213e219a4138d88a66e10b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79b46553c606692cc4599a8b105d32c59484e074f6d0ae45375fa44200299e2 +size 49777 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6381e2234d780243f99dbfc7c3024fbbcc04c4f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96819f54522038973d6ea77cb8a1935f368eca9febef9cd7b029fb580532051b +size 31498 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7f1a60cf0ad7d6a0b8f64bc2ef2330202796a4d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:086cc1d5fd06c54bb4f8b1afa75b0453d2eeab9489ae208f9a5bf9da73ac0011 +size 35993 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ee3201571cbf480c862fd75ce0228bb219c4466 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502493b2970d4e67483cc420fcec1d0e90ee5551585b7103645b43132a911744 +size 29436 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6b277b6713998c6574eb25f868906e72b29b8c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0983b0ff75af9573400cbee456fa98170ed8afbf02d1e117a9cf51a07f7fed16 +size 40244 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2bcfce3c0949482dd5ecb2e06cad1dcbb235f124 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a437c0a17791275583242e669cbfe156abf32dc4fee076717c9267d8213d2aa2 +size 27011 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04b3bb5bbc72c9fb29f2f046ba2c5f36c97b4cda --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89df807570b03d5afc6743aa6f12aa5db5c94e628db14a5929296d1e5d37468f +size 21675 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..720ae929247ffb0d63c8506c3ad8e060a3917690 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b007efc68a450f966325fff70dd6974608f33db23a58cf2756fd8464ff45d71f +size 57422 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c54b2f31db5831266d1307486e3ba56f8114139 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf84afefa7b8f7dc9095017a14d6b6884b9640b28c06c15b4d8cb1786c8d72ac +size 22265 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd7f50836a827953431607474e1b7f84cd1e0e68 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ab2e6f3cbf8cdea8675ede43c4201036cde17ba9e8630d122174d5ca2ed578 +size 169060 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cf0272c424bfb2ded15f3e5ee81d581f2b7e130 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:706369370ec45293374ae041a62bb5a877c5b8ccc1cdcfdffb0aa0506b69fb60 +size 90051 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9308cf1bbff3d2a07a2762c9914c408ec743a329 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:239e64c1bf9246d1832a496e9150e062091c86d4cb21609da8eb4549abe5c7e8 +size 153748 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..783a269fd803ef112cead6c4390eaf79a61fe079 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4733feea5640f0d4cce74cbdacca04f7dee1bc2e058eda9d6361f3a6ad98c5d9 +size 79386 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d7317f396bdf8196d84ae06861c8d57e07ee326 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7165133d86645754da70639c6ed16b19cbd02cbc5fd80228a45a0b2520b52735 +size 74247 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c62d8cdc502dc65b716a47d952f9c4a627ef99ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08ec6d3a683be559190ef6937616f91826493110777cd9cb44eb192540a7c573 +size 82467 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b847e9f02a1161f3c3a63d9a16b8dea643876806 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ce44403cefc1e2e770a28008975fa389bac38649797fe03254260758f3f30ea +size 89768 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c149ef5c892f18479b2c29452b9de06c90881a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e42536bd9b29db4090950e35141c6c777c4703860217e99c03954437077b56 +size 1003266 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d55234b721c489f5e94ad889dfb6793abc76867 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a314b30140c57261b513fd0bc26717d00714e0888ad4fb55e7f011e1f28c25d +size 132933 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5eaaa7232b6a1fd59d3b79e079a491f0267d0544 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d478f34a38f6042fa5866616f9e2d00fc3496978862e558c573950f59330f5c4 +size 182747 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c83ddff0bfa844fc42a1aaba4f71aa376c6a3f73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7e2f65aa11e8456c6830dce12f7c575c89cd09d84e84085b574c61ccc2076c +size 27514 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e64c1aeb6822f29acb34970b1022cdfc0464c8db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89904b3b9f83096d29519cf8bd972e541ba4e78801ceb552b98612e5ed44145d +size 120271 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02cbda1fc7b0d272c27a25d0b447a49b43128123 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8aa4bae06b687e2fb4e0af6f7f07233eca9550fef1a40b0c2bd8be0de2ff054 +size 57984 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eedd7c79e025dbba6be4a604710ba6c76492f2c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72700a63d39a1b2e14ea15692d5b1726d5184a2e4ddddf81d78f4ca5b23f7a2 +size 25413 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfaeea24ff33b898e07e299ac5eb65fad637deb3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f238c753a8b349ffb010f16b83f0b30a0c12f7958445c575ed22bbc11a41c376 +size 39394 diff --git a/eval-results/mmlu/0/ckpt_174/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_174/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..283a46fed810ab10cada57e029014e0202bfbee8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66049bc65db2eb384570199ba363b60b78f340b12d6585f78bbdb179decf082f +size 32933 diff --git a/eval-results/mmlu/0/ckpt_174/results.json.tar.gz b/eval-results/mmlu/0/ckpt_174/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd10c8588d9547f0ec7e54779ab9431251e1803c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_174/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa9d57fc3e6a18db144024797c1d143b5ed9d6d9025fb722785ba14a627cabdd +size 7626 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e01b16aa6cf9c0441714232172e87333950496c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d46f2dda3b6f2ed122a14fec2cb66d2d6e95ee14a790dd71bbc7b39c6be168b +size 16964 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a279eb27bd9521f3419db7aa86608f1c167a4935 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0adc59cfb2084bd85b01be37aee1f303962fddaebc3ae4cc08b049c38a63aa10 +size 29799 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d9ed0b7b406a9067ce905b30fa0c0ee1a686ff5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19410aa105d201cdda2e090ce549776ebc22703497ebf07768ad0bfe7f054c73 +size 39835 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..073ce6f6638650d80a73010acd7bf31376aee5f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c04859aecbcecbf44011d8baad5e7665efa03687b67a5db4f0e95a9b5a993d5 +size 26719 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3687ddb4dec738938b109640c48da16a29dda3f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90c49e4ef58cb77ebc477d1413d4e68d2742a6ae9618c3e8e79cb78b2decbb8 +size 61168 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1636acb78af419bb7ff9e3ff111ae2f0c20164b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c55f9a3bee866784731e4089a96db89a789f2761f752ba4fa5d7ef1561397fd0 +size 40338 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44ae06d946ee0f41f25bcd17982062773c32daa3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ddef2f400638d33de3dfe4ffe8fcd17a188bff1773dbbda384f63babe187ef3 +size 23759 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10379df91bcd421ab2edb069cd1ccb361b8ca11d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68e0366d7f2a6f1fa8d1ad0279ba0ec4c76b46f4b7f4bc7756bc2d5c1bbc86b +size 31081 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..466ad96ca6e7fd8831bd4c4c5b9f1e070a9e6820 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d374675408f20301af721c1c14427ca3ae9cc0a9f9ea180048c277e44866e80 +size 22904 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..143552c97e3eb8b317cf0e6c48cd010633c6a76c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b332ec27e3fd19481d53541ea993a5add87242901ee21a0afb493ee9a5bd247 +size 60889 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96e46c0aca31b030ab78404bab937bce92a0532b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048d87e2fc6d275dd4af6709fec6fdb195ba2d798c8fd74f462a324682ad846a +size 25695 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5c8d86f6b6dd98891f1fc1eeaf76160e24b6cdd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01fcd0dcbebc94a1b22d92683fdc59bf9aa5cb1b1ae5dfb19d771ec283bc998b +size 25800 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f15122d64a8d1daf7344c484ab98cb4c526fc549 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864ae3b319864154e43e0c64c94caa4856c19392d1a0784d5363ff35ea0bc4a4 +size 46497 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d3dcb2f742630ce0050feea00b95a8fea86aad9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47822b268d10b8e4541b2e33bd82f9746cde476f2155f783930a3b46f1a7d8af +size 31405 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..740932a2cfafb8b808d33f0fb3c1baf39225febf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3344df1562bd0935600891dbf691be1d9758704f873b295ef5397d89f571276 +size 28768 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c579c96355c1476fea2b140692a0fef120554c8d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4866d46574a1ab136bf2ac28ae43511d85d9d16ec64fa1725848a08ba981ad7a +size 74452 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54910cba08fe364ebf0020901b2307613976e5a5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c01d979eda4ac898166d736cfbbe1a30e8d94f36258c37a640e18e19a302da7 +size 30096 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45e332324d4cf8f945c10d0568773a965136e8b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d77a9add97f5f6c4db43d9a79de92071afe1394fbd504a53006e63e369f04aa +size 19092 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1563600fff992563c450deeb649d594c788ce06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4f9a2bfa8518f24ed0212604bf3e6add595ee43b9987788a38d97c250de5bf1 +size 87925 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34b117a9a1da8eee2722198a2ed3219d11d18c20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ff919305e9392ba6589df2e6803080863ee623fcc515f0dc5746a2aa8fab685 +size 49880 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d12f9bf789772e888f62fa1dcff28c426fef3e1d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8372de0e636b3744a0ad917f54ea14dad65bc84afc29d34f59a4251cc1ead81d +size 31578 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21acae185589b729c939a0209e7984444368b798 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ecb2cca5c4312c27cac5fe35f3c3852590c52fc45b8c6d87240b5987352c0ea +size 145421 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3329d808df6be5e5760a0732fb9ed61c9e2695dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91f0010af28aefde84b69bd8552352b7a582e2db35e931dcb384f9fad0484444 +size 44574 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbb0eda5ae9b60c31d8cb3bc039aa624123c9932 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea067c4a308786123679f0b5b01645b2d7cca4bb15ebd022e4ad00b078bbc45 +size 54411 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72b1687fd0e58e35d90277066b997db5154374f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d4627275b193f3dcf10b9aae345f9b358ad449eea681c88040fe220dc9bccc +size 92718 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cbf3113cbfb1d05db4fdc5bbc41cab017da0f3d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a43386d60fb5c94b62dc4b44b88002293297c3f246895827d62389575dfe9923 +size 56969 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcc200199d1ca50ab1364217fc6264ccf36a5f63 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5bb5a30a9fafe6d856a5b21705473a2fb12862cf7f21ac6a1f1f4972cc7a793 +size 58428 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f185f24f47addd184e184fdc7528b5b41c6792ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20ec324db0e2ed9bcb055cc4fad51bf4e71e1c256bfa611c7c02e60f719e0870 +size 42796 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba37ce38e6118b6d3723366b1b3f76d3c5870d32 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9cf91b1fa9edc9281eae1fb602713d4b33a3b151d2c6ddd91534b2a87494422 +size 143937 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26bf0b8f9521d1525215266b36d17f38f002442c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1341dc46b46d50767838010181c4ba0061d6f37bd8b525a08dd6da2611385850 +size 71110 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b96ec68a3cb0096e7e4516bceac23f7e9fd310f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b0ec4bfad46f176c50c5c59a1982558063f68f307fb2bdecc838dc7edb5af12 +size 162004 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b045cc21514af7c17d70f46666e62c2e301f6af9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c604fc7f6b8f5ae2d6a88beafdc32a6a9c8666ffb811ce781d42967b6ad4730 +size 209438 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26e5e778f6e1daf079f2b42bab9e4cf8dacd6394 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cdf91ba434ca38a0cac72e24be8214eda63a04cdf6ac4d2eeb8ee2a085bcef0 +size 49866 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a795b93425acde055af4db69411b1a9e94b3ac8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04235add981ec901134906bfc002ba4a3d8a25b0a712c1239473b41a46d4584c +size 31530 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04fe77883bf61937b2c7b497664ae0c7b72b257f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c174c0e2499ebded7ba685c72c15c8257bb5fa19c0ba2944f5174485b40c8c +size 36052 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9a560e17d4ce4fde5597e736150a3730a303a45 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2988ec37a195d8fcf7cc89cb2f04f2f8a5248d3fc689c47dac06ffd0062cb91 +size 29521 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a86d1f986cb337f31639f509f2d44bd759a6da4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8125716c5030b977b344a2f67b901096f9e7f35416e79d4735dbf431d04bdfc +size 40312 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..824a21b35f93b43aa343d4d00cdc494504937612 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a2536a403c132c9067abc7ac55ce9bd42db2e98b13babd7f436b4b2cef4b734 +size 27022 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc3e4359766bb8bd759c0f2d95c914c80d8ae2f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8971c04e24d3ab5a185e1f7d2ad3ffcce1ed0633d9dadaec7df3b2dda3f60e0 +size 21667 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b140941d245e7fa69262454c035153b8646272d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4e574bb905ddf89333443ff0d6691e2d6b51c0c00b51a8b07e7ab56e3f479d3 +size 57433 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae9f3eccb2a488e12751ad409a54005e46374afb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:062b752964b115fc7960160f6758f5b3d56fa19a71e30b307792fce64d748e81 +size 22268 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bf77ca87813aabf4a784dcb168adf54397b7be1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab78d6b75a69af1e9b4bff6ece18faa88651a43cc54b46e76900927d48af0127 +size 169184 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..649a909acb952f94fdffbfeb57e33e82455d9858 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2716aa1fa6240c1e81fe6dac63d6deff9c18c91aab87ca1a3b31bb69310c8f2 +size 90188 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bd3ddeffe6df342dfef8b6cc3960236051b840f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:971fe9152b7a4357a57ceef8d2e87b72a365e17be20da8241322183682581ee9 +size 154167 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..304503b527fae3dc55724d24fedec103fe46f626 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f740f0487ebfa579d8b481fc2b08600cc160f7b62232a9dac38318e7dee0116 +size 79336 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e530070911fc29fafa018c3f6dd7c6ff43dca7ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9347660c5bac1c9ff297fcf6d38c62cadd1a585b58e153d2bb96c47136327077 +size 74277 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c781a2099bdd6eb468a681fec7352d11936b81c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2744863c99ccd0c0c2bf09a0ac668fbc8d90b86f0806c0bcc2e1fe5180859c5f +size 82512 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee6630a5ef135e9ea9c49c904d6df305b9b1f9e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4fa83e0ab8f35becb9f8dea72a49be03339637b39f1cdddc40398812c416e3 +size 89878 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3e76b38ad893481fe393a578f834962647b5fd2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36fa83963fdbbf296ca107325387315158ac1f9606160ded6831e93c0ae0c769 +size 1004326 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..108d6f8b3e399635f043419a826e664154f4df9b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b259ad5b0002456e85b887010b8a9a775ec98298a20f9badd6deb7b78e1fc2b +size 132998 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8744af0bb41ac0c0daed9d9b32d508e337270ded --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76e2250d4464eba4de643f08ebe345e9a6bba9a17d7816ce6841f34d04e5f77 +size 182928 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4a5133b529c1b6a9d32ceb88068b06d6273074a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b20f264ee7b818728c9e9d66953eb243c2401f0bd42ff13bd67a7171ba365f6 +size 27552 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99e8c7ff743a2eefd2e29ae38f358a0aac525ad5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6ab6f3e80f6775e9aea785b722941cf07e01b8143eaa840b1285673e0d5c0e +size 120451 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a0027285a836bb508b610183755602d92296fce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc8feeb210b706fbefc55f96f9b2aaebb096a637fd1d037c40a05d87b91e60f +size 57982 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b556214264061ad60582ad3db9453953ab783a0c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d17510ffb751c23fddf34f192818f0ce4546ba363aab02e6cddd70b66bf08b4 +size 25426 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cc779b4ba2d4dee5c2c91c79b6f847ed7c93717 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09c3fe6cd2cbaac99f170581df0118e0d7207456435728a8b9805abb5c509390 +size 39416 diff --git a/eval-results/mmlu/0/ckpt_177/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_177/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16e0b03dadc124fe62009acc65c5f51259fd6d70 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02ba83d9ff0770bc0d739d6072b8e8d9b64a076e3aaf3d64a932f25067061d82 +size 32939 diff --git a/eval-results/mmlu/0/ckpt_177/results.json.tar.gz b/eval-results/mmlu/0/ckpt_177/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19b9eb81dbdf86ea92c86beb47f4974eab091c4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_177/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb11f84ae75253113d0e8886281f9d26a49ba283bebac3fad3f949799cb294e2 +size 7620 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5b45e78eb9cee8d0755f3d3ba6b356fcdc8d9ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:669d8f5959a457b91a5135218b91e8862dd655db988eed69e63b3d63c543e999 +size 17019 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9bed11b962bfd489ead6e583643761f3e3703f59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae82ca767082d1fbf524c013f95ae203cb4c71f302058587f57ae288efd29008 +size 29825 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..248a44b463a063736fb450507c7a2247d6168dc3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ad21424964aab6b51c2aba237fd9b026c0c461d98c10277cbd593a9031c6f3 +size 39818 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a70c22e59bd2c1e0a1f389bc851d066d94c8c315 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f87d49141aa37570720c29ea08b48a81a5a6a67be50970a1042ba2550c512f5 +size 26764 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac9bd18f0a4d357e4e9838cfa7ad800cd35fe1ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49dafd5bcd0411e36d6c31377cd50866f6aee02345e1484d036a8665b28cf41d +size 61284 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4406b6d8ecb2f38066b4c86dabd05478efcdd09 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:671c0b2e69d2debc8af5a63d48ac4a3d5deae649748f5c0628079fc93cff7394 +size 40391 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..285d64234454442c424e5b34dce940f149e57eac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a237092192ea53f92e8fd069b4cf2f767f6e2f25a8cc25e4bc99cc5c11799aba +size 23794 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdef57891da3a5e0897934ce2c484274b231280b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2080b0f2e51d7bd691cbf4ad9492c570a4353edb9597098fc353cda8c098d27 +size 31082 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59464a9b0faa239662f1993feb4b047993ad8481 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9a8a966a3478e8b23b4def56d02d6efbe0664a6049ea45a6cf0730e9a86f35c +size 22888 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a4deacd123cdd15139bf49d33ce3cf169e801cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc7789849f8ec414fa3286bd767249bd5b05c5f2a5c1513a86f8236de249a87a +size 60935 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1490500f5e96446cb3ed01de346317ba114c37cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038155c8c50954a1633b47dbc47dcb0157172e49d19166382f5fbd0df3885067 +size 25707 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c193c4459306f1e4a4dd7260aa4555f5115254ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a9ec622c1734857bd1a1af2b2315c839c0d8d23f6440fdea384c4f9788a50ce +size 25787 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e604bd353cad43538f46bc74224aa1852e1543a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0efe6d01e4f80f9a86011f408101665cd742b0a54b68ef9e15aef5e3ed3e8002 +size 46488 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a83980f5481c7d14087e2b9ae04ab6c13e2f2ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dce22d8cb1487f54d42cbb299bd03422da251c804fbb498533ebd0eb513337a +size 31446 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e7579218124710667b7b3f5d12c123745b490f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:406f602c06d7d3991b2f7d4e8ca44d7bcabef3b8fc085fca839d8401430d3420 +size 28771 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b593ced5a5693a82bfde93cf9b294c7bfe53cf03 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6334429500b9997f33d01c5f72a7de9fe206205f632248f09f7c946b9acd62f0 +size 74479 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fa3fadf40db3cfe8f123f76ce1182c51b352924 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f49ebe85ce7c8395e842e22b62b78145ab0bdaaf7bce5ca4f21f3e1a631e87 +size 30083 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00201a0ec49d4456fe1b206041c2642fa0bf08c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b234c3679a7f04d758e23608adab53af7688a14c399821e7aa173eca537aed34 +size 19105 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5173263249725d80a7e5f77ee61eb8849343593 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09cf481db7fd72619816bfc89c177ad0a2da4c6c86b8e3d20c8afadef9d2d865 +size 87991 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16436f9b9a9eb7af7915196093ebbcb42053e216 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:145645f4b0e014dab7bf06da3f8684f7d4275ccdf9db2680fa6e6bf415a4f32f +size 49988 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e750193c2a0eff8aedb11731979bed19ce74a7e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:526d36b1f17fbf69a667f3f5e1cee071a52b2bf2bf02580c3dee3396e7d0f55e +size 31611 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..756e95ce8ddaf315cbe529c5e256dac65843bae0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f0340a7b25ba0879f9a840cfd6acf0a614932cdef28aa122f12fadc47db1582 +size 145518 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a5d2232ad5091fbe73d67bb8654786f4ff91bf5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcfdc4c5c4b7d103c8d3a8c04b1233acd2976b3f963e0cdc578d74da28199d49 +size 44624 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba23e40a2ee663c198a7336e05f8fffab7eaa23b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9338216137f640866280fdb33f5c49fcf5d0471631509304242150d9250dd93e +size 54404 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2461fe8684482aa68028f3810274aa006cf58b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4675a27706ec0025deb808d3c079268028f4dee13af2d33824bdf9a5ce8836fe +size 92728 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75fa0f2555bb31301c6f1ab4b3892a4580f7d642 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38be315c87f8a078e941fa04f486833c247f8c83032bc86ffae78a630c4a83af +size 56948 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..015bcb9f10bbf7cfd7f6e5a56e3bccce9a812df5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aafc97da02b0b1e4837c3d6dd71559fd1b33749687701b9b18c916bf221b20c1 +size 58442 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61fae28428a9909ceaf6fc339abf48a4c799e7a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e2be1cc305df21c2a8979f2dcbcba9833fe39049c22edbb6730cab065322201 +size 42815 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28721aca235486ed18622b61c76aa58dcdae2b72 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05180478700d9f710009bb8f506fc836b38b13601da5165244515fce61ca36d6 +size 143892 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92317ba4f6fb28d95615a72e98f3c1efb03be5be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b6e2f3f7a5e1bd3bab4b9e1a908d69d9f98944051187946683fe782a0773011 +size 71075 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06a6d1285e6dd2d549d62b1fda75227c18f62c95 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef99cf60551c0fa52f8ac8f7fb42bd4cc6150dbea85b218f407f72b3870f1d8b +size 162072 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cfc6db75dedbb2b92020f2477eeb75fbd586c2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7edfe72a35189311d17dbefd879bbcea6939ead72f451435444bdfe9034d0a1 +size 209515 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02a9482adf9efe61ac55eb5f17ce2bdb619ce8a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a285e8d4b6be49b9e400f64c51d439b13332506e1d8709a06e288ce91324f9c4 +size 49920 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a725dfc2489a6da33ecec0545c71633d0490de5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bb2aa417b9871c0fab93ffda9e695a718256eb8e59b404b8882fdfd822f1963 +size 31517 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb2260ed8f1b5f7a8adc7c1108b2070b41ae5f79 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff6e3d3cad74e26f0dbe3389e04ac18d7a922a74fbe95c7db5d37abdb4ddab53 +size 36051 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4d2ef99a7dcdccf4f14a753c03dfcbe6558e731 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44accce380a5cd1e1004fcd9dacd02607536db5d69a21890cce0a4963636a73b +size 29478 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11113841bb6967f375700f0af1a787a4a0c07453 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62afd098beff744803fd19ff0c81772d9fd29e7b27ed2a5ab7027e0afc9aa3c5 +size 40366 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..280649a15b25f31947c255f4ce5c029b1fe48e4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a08c9d0d2562006ffc040cd6bc248a9296838b46b56eb947ceafdf77f6b1638 +size 26979 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2172a5553f7c0651ef5cc490d79fbf1b078cf71e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23b9d8601a7c62196becdfd7ab1b74e88c28d2d3cd0ce24a451b74860a04a428 +size 21678 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e7b420219ec0353881dfea7bfcb8efff00e8809 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f228f17a5b8e1f15c347b814eac6161895ab8936febd27e9395bd889c65e2e7 +size 57427 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc907ddb8b756b0eebe5d3fc20f9a0e61ba3557a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e831f7376593733b7a8be438590f6d5aa812b1f00968d8987876919bd49acc74 +size 22287 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9f8b66ff1ab2c27997bd338abfe6d72a3ab214c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1029d042fd7ce605932b347f6a617394ecc3dfec8350d19746cd42a5dadf27ea +size 169238 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2294d77202fe942be4944f00515264a5be31f25f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52855e602749563caa8ee2a77c20188d63145e4805066881f6409a841c6aaa71 +size 90182 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c53e4f321d7eefbb428db9dde0d28f8b7f4592e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c307a9febeaec82a86c0c6c8f1ee8d807fdae13b9a93d8347e425236a319090 +size 153565 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f5034fc406dae3ec4c6aa107a19e083eb622b62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e34d54ba3b797e1f7bffd316da5bdd990a5c5b3b42b6bdd9bb2f9d553420fb +size 79397 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec7ca9b962c5620569e5368f9d7e412761a1f92d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d66276bf556e1f62ff4bfdb5296fb38bd6acf23147da1a06e8e5267e8f5cb0 +size 74315 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81257c69057afb2334ead37e2293dfdd026da5f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01070aa89c35f96b9cc0d04f6e854f37eeef237fbd91e335350d714201c88df2 +size 82578 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79afbb40defef6c050afe30fc4c425328a114805 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b80c7e58c6b59c016c8f7eee4988e70e4d5189c53bcf4f8fdca6576ae299a92 +size 89808 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96386c5bfccf12a7f8c822070b85f413fa1102bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3c964b6c565d053da31983a6442fd79cac59e5c8958c1682ab43bcad0eebf4b +size 1003872 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0426d1151c0f828d4a590d24361d1de275a58a08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac819e94d2fd54ab15510736ac010885b0c9c9c13b1f73ddf718cc072055380 +size 132881 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dda7c495b5506cb24fcf00b062ce36a9e3b31c3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:823856d35b1d476cc913013aa234ea99c8a82ce21e0e03ee4623621cd14fa8ae +size 183024 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd6899a88437f9b9382dbc6df7dae742acd7eae8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c4c5e831706805550a99140b7edc09d3338efe77f5b9d7e6603315e2bfa8c2e +size 27572 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fac9e39550dafd3fe5ee3a19d54be51bc863aa4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0d8a66827e172840e287d5159b920d1b3aefb0d7f9e9a5401319aadf7611c5 +size 120282 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d86b187143a9618e44f806a3032223d0d31b3bfe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8a6523e92bfd6f1c3d804ceb81585a42ebe5d1f56e0f1128aa26c83d29fb22 +size 58038 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98a3147ac9433b33cf25131339d501d2143809d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46a311c029526c37cdc7452e490b7d14105ddd29612e8723f247918708fdde35 +size 25456 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc62644ed578d6d19eee9afbaa2465bc15353187 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68d3debaa9f4a10f06c43835645dff17c75c4ef12e9de15f655322f56b90526 +size 39442 diff --git a/eval-results/mmlu/0/ckpt_180/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_180/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92896ed81ee7edc84f8ef0db6df2e8540db5e080 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb5c23ca7188312c30a49379570b1be95ad17eae1bd6b8403038fc0b174cce89 +size 32948 diff --git a/eval-results/mmlu/0/ckpt_180/results.json.tar.gz b/eval-results/mmlu/0/ckpt_180/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa52cabd44b706db58e28ba71940981c1b58fb50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_180/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc87bfa0b98348299e3f462e7581b7473c2020f8ed8a9f1ad544238867087419 +size 7645 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4316613c86052c272418eac740f899f41da4ab7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4669f223e30b2dace59ed79d7928fb180b6f0e982c2b3677aad521f60c97a3a +size 17024 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89e9c9351686018bbded30e1dff1e86bccb840e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:009d979d11e29883be0ae1db8e89400773031e84b07598d22cc22d3803485a08 +size 29831 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fe0a03e34c724cc48364e11601752da9d67a695 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef25da5e35ac6427c7f84a21e2ba4cf3bc53ad61cb8bc341675e423c0e539364 +size 39828 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..081a6eafb711180f05ece62a83d696fec78788ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df20cc19ba561e8e09532168b896e1e5804302ac4eee9b921f327130ff2c8d9 +size 26743 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2f0905afd09cb6ace85c0e019b382f25947b191 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b02888f7539f5e4984f75a3c623db304295a13e11a49baeebac58e1029153f0e +size 61161 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fabfba836b90b701ad897d4fe0f9df0eecb9e1f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a96539ac7f708e050c9542e24637b097e8497dfc302da3a597ed8db1f8f795eb +size 40358 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f504bc5f7da8bdeae94e147942c2d4a7ffae65aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fa89723e11125bdf50d1f4b5e521b62416150065803f168eb994f4a45260faa +size 23772 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1ef53ec80ad270f4c6470f21dcd43b272dcfad1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b57b4d92b17d8afb8025c4097af30f849e8a8057ad7ca8524e48d0f41ff5ac +size 31091 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47b3a00aace5463843425db80edf21e92bad2acd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ea2f97d2efe3ae1e425792d99dda2b9cca0a83017916fea65f271add531d0f +size 22915 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11e488e49b943226270eb7f7b89d57e6f46a060d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd794bdf836a39df51cf965583188907488d93f4403fc7cf5521501fb594223 +size 60915 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06c1707207795ab7886dbb9fa54a58add16c8b27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e69996304021de04fd4b69a088e1053575206f5e24c5b1811c8c076f7ddbbb3 +size 25688 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..378aed337858120793785dca8bddb789b1fb874c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c80f4830a534bc5f2f0955b136349d71c33c3640e6de40babeac9348037dc23e +size 25769 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..663b1e7cb2cf48694126a4522fc73069f3a96f35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a98c01b94d0c544b80e86d9907b5a685c1c7cab1936e8b109fa52f7261755b37 +size 46481 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a1faf5ad6a8528d14de459d049b49aed22e4720 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad9a8f2940c2862bba3dec0d098f417f8ed782ffb36da4f3db108b010113482 +size 31516 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..890a2d90109a9fea6aa9ce92b47427b5aa388d59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7949bffbac3a6883cc19ce76beb915c67734cd400640142be9458c591bddd13b +size 28816 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7541e2a43dd0646d68cf4944d84fa09b41ce71fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:333c1bfd73b7204ecc836395b85df67bac4ac1abb2e892383f7566dd4d32b70a +size 74432 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64d8afa114ef51f2d77ca866fce3cbbd58231d9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd18c8d11f0d3b15677969478055a6606ef7d0c5336867d698821031036bbc1 +size 30036 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..019b1b3fbab66e4b517ecf5f36da3ca19f1e5059 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a99297026784cac54ec40078f8a2682d36764f6fa7d16415a66ac7fc4a7cda7 +size 19091 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b74e213297da8bef77eeb3b78e1c76fcaa4d93d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14e97621f267da41f6bb2812496b5e159f36b063c2c5506954a37c26ce70d95d +size 87891 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79b58263fd4fba8acd237e927acbde6f73e3f4b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:922e7b53d1f4ff5d6ce19317e45ad7e169901d778115fdf3dc7566e7aa9f189d +size 49973 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e66b4ecf3a393e90796712b336b88d1f05a34ffd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003ac952bcf00c3df81dc9d0e193e19e39b5e8ef8a1f253eb749adb654483b4f +size 31561 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a9a716c250eae89f39abbe49b2da9153de53ee8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:494fe46154c4ff77ca88ecc6d14ae1618f5295882bbd7dde7b5bd597183c6e04 +size 145341 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..416212b758eaceee54c0b35bc26cf151a16f0f50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090b9ebf67752fe1b4db47dc16bae6feb9d35886ab0f3c44c2515fc0686c73cb +size 44534 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1335dd67cc9cb44d21465f463918dafb49bb647 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32b0708058a8c572bbc28bba2cb68dad64d787d9522fc4824c39fdb12dda477 +size 54348 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..586c8e505fd9a92b41811e60e09eb46fea5a82c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781f0de0ec6a166e2b7600c6f0ad1ed5e75a8f6082d06f21db0387eb40bdda0d +size 92645 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afbe5d67d91a45cbc624f971c5d6437b6d983693 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c41c6b2da93a3d3fd7a65e6595280e1ad0872b7cc09c061490efbb786118e8f +size 57072 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8e2acc1704ade374d2f1e6aeca1b77059cde660 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c36614b408910cdbaec95ddd3c6cd2b8dc8870c02c1e8d0bc81f42a147225fa +size 58415 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fbbce83f0f21c6f08b45c0e49a2c73d82f644b2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b91cf44bfab54bd4edf55a7dbb4798906a6394bd605743a1c439d21331948b6 +size 42840 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7995058ad48811d253138cd867ae1e042e515e86 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba673a87ecfba862e37277cc7f50fae93d64b15a98604df517d9f55295263f0 +size 143818 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da7f40bf11556fece678b23de87fb8f57c3f26a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb3c749f109e08eb57c94f6b2f82b6df989010efd947b8c2deaf997105b345e +size 71104 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..317b7d5cdf03a8fedabcb819b6143009991f2391 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d056d5aee6ad3707520a8e24d9ff80358706096fb450c319066f47909656a34 +size 161935 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64f078aa508d9b6c8acc1b4cc1baebb7373d3d4f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934b0b0a40b66f4b28c992c5fd50abf93f900fc6d9f2a330db04fec9c1257dd0 +size 209359 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7f15601f5018311a86e81aed801be7968d8b0ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce65a0eca126a513f9899a998bced39e3a0e5f32f09a1d528644a28c027f21c6 +size 49842 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c47b3aab88e54c6a39a081153f7b52eb195c02ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06151be327ce27b3f527e95f7ae16ad6bfc08702e89786a9490dab6213467b14 +size 31514 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2393cc9ee2a98f08473ce158ce75fe4858538db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ccaf7edd3ceac9497fb74f35a073efbc7f8f16079e30ce0c11f0f8548d329b9 +size 36034 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1bc518e9400c454f17f6849998cfc075e98497a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef3e876ce3b05e24dd6cfb55b15c988867b2720c0689b72412d40cfbdb580ba4 +size 29467 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3f843b68155dd1627c979610837a308b71c6a4f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e459a1fadab280e19ae6b68d403a8086aa051a62e68285d35dad98dc7d658f3 +size 40258 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dacfec97cf81b46481edc754fc92d79c54b588d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb18d6466a94fdf3447f2ce7a02a570dcdc36136477135a74f858f67c71646d +size 27026 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cefa77ce90652e67ec4acdb0e758a0d6769f005b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:715045357cc5e5145d8d85dbd0bbbfb83a15128f7dc428c4f1e924fcdfdb7974 +size 21687 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..611f2d59c5c22cbe184a2875c3a0a3423ae81489 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cb256c6985c5afe0247ca7c229ffdccd408c93c888f0cf4c769c0bbb7f184bf +size 57399 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f6eb0f5a2b0c0466f40f49339528ce8a831c563 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0844e4bba856fcbc4d7f23484cf22ff7585b6e2a351821a7ccd6537751bd1e41 +size 22277 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8e81b9da7d1854727b9882728f238b6c2f74345 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35865e9bed28fa32ba7da1dd2713126d8560cde560a1922904dc29940e2a1f37 +size 169023 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8431f137f3a5f02bbea7906c6f583b5f4dcab4f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67662021d217f8061af5372eee5be43b09f77968768de24e267170a1088ef573 +size 90180 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ffec231e075b8f8f911983988d115b4fc2f6300 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82dad72648ccf88f619b228ccfd0fbcdfc27e48af4acb1851d5b14e57bd24693 +size 153704 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe70e6ae55be1759b82e26500f2953d66349bc35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8807fb31b6e220ee309b18eea032fd18cb0bf6f9abc213c1e9ddf34713e95035 +size 79363 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8111b5d47cda7a8558fce4c08a91690644232b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcfb03274b13aee9c771c76613c3c9c4bd52f0dbfd2813725b14db676dd039cc +size 74188 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64d9fcbbb83bc509085a5b8c7e2ccfd959fd6d97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f71d6f0ed7f4cbc83128404d408bb75e81ff62c2ebf45f3a20c83e56becfbc4d +size 82467 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..514a77b965e20bdebc1001d97613c91f334102f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52cee6986a62be20b192f4390803e3e100455cdf6dea76e790f8435a49a47b6e +size 89837 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d92b97376d03de7f7941509073c32b0225de724b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bea10440434b5637dbf5f3e34377e7c26ab2697b8c79be13a98186941cf43a2 +size 1003977 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cb076d5d3e5629212fdbe9ad26f59ca6498a4fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81d7d42660c41df4ab5499eeb13a140afeafca63ebd845c376ef108a6a92eacc +size 132914 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4deae35192758f89a730487cddc63612bc76fab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47588f3d224712c7197205fde90b6fa3721d71a420ee1d7f323cc7bb2ed5f0a +size 182922 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb25a4342a08728b55ff3e86a65b878525a7c31c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5226867f0cd5c5a9890ea43e64406ab1cfb1237a59edbc2c3b931c32f557eee6 +size 27510 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be6c73c2e5490d7c5840c32605de2008ce812208 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1188a6045120992c2b84355968355df6143526ceaa99e096f806f2ffaaca51a +size 120398 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a4d4dce83d969c4e367486265bfe96b70b8b592 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f6b869ae3216adde2ac2f41ad6070affb1a2b57c4ab387d9febc7382d63e76 +size 57947 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..296184d43b061fc24c461d6f329cf3f80ca8d8dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3d3783d5a31d5d05dcc6e1d9ea7120382e2a3f7da92e421407f19a76a207ea +size 25425 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17a8ccc7c28b7eb038c6663c85120ed00ea3413f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0be9eba0fa4904d28ff138b1077c016400854a84270213f764014c8a455e4b40 +size 39419 diff --git a/eval-results/mmlu/0/ckpt_183/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_183/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ad4a48732cdaa4daa0d933a545d116d779593f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4492972a0a3945eeaec75459d52820cac8c194895f4cc58741d6cc778e1a1fe3 +size 32936 diff --git a/eval-results/mmlu/0/ckpt_183/results.json.tar.gz b/eval-results/mmlu/0/ckpt_183/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a354bec66f6629893536870d9332518986f475f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_183/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9d8162d6893debb069514d13dbd0f4fe72c9464862b1cc83147479e714490b5 +size 7634 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb985633681000ba3e30f13f4bf918db63874e45 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9507f27ac60ffc3fad7d6c749e8bf7c78cca7fa9f080e5419d20b852e6f030e1 +size 17014 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ed900dd4683132a5cf79f2e65213d6acbb1e675 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:971458c6b8610f0c03d982b9e370230d38c139f51933a7cbf3c086966ca24e5e +size 29769 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8987b4e9820cd9b0509ce0a3731f850ba45b9179 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e4befe4c2188bd0fd4d5ce3e8626739361df709e9d4899f33cc9648ceaf9a1e +size 39815 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77957308a25864fc712ae4e6034bee9bf1aa7a55 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:587dd380ce71f93138361dd8711f9ae2bd52109a433247307f0bb774ec3c6319 +size 26723 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..057c0e4eaef7574938c42049deb3446551321c0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b40c7ca7459f6e30d342a743dac28067bf0bc2995465cc9366f494f267c75a1 +size 61189 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fe3c3d06bc66434c65ff12e080603b853b48e83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f14b7cbdda719076cbd91028dc17e4dcb99158ff7a7a2fa99d2f2114d906f1f +size 40399 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74be39affe0776b0aaa0dd46db6b0f2a14bcec5a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0186054a86e6801a4927911b1a0de561e1956c4a20b5955773fe013638c7df83 +size 23790 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c05366f32b92e5f91479839224c258b09b0b29f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ef7c1d1193d12be509e8853f7a2503d47f21d1c38b2ac8a82dd957d5a4d26f5 +size 31095 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..883b6958934c22890c5f668b4a24849b8a5068aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b933898f6ba92bc76a723bc5b5f8cd2a16fa331fa9584f6f24d6c16193f7dfa +size 22942 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41a8c21ae2888f6e3c4eb329a9302505c8d84451 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bb3172e359ef32e01ae5a97a420b4e6b51c5133e6c790667a17ca7a2fc49275 +size 60827 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64665153f948027b2624440f5c1de94fe6bae770 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92cc914ca4500b1b31ffc28630807c55a4f12f1c2ea1e79026cc7b12d01bc188 +size 25730 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d080093ea7ebc22a2c48b7478250d46b641ab6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b074c730d6a6d7913ee83c5aa9b86605013e78a2b3553a89a39b0b6a139cc73 +size 25793 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af9e02987236d8350be029df6cfeb1ca5b70c816 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66be19b34e3c156632ade3fde752f76e81f69deffc81fd3875b1b22813b5a428 +size 46523 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7751117f496c0c7eaa3b51966ba982259c856350 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca56fab0480c7c5d9729d06fd0b115d3e44656572848b13f0d6355f51e4d2895 +size 31486 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c4f5e345d564ed656a3452f7a85ccaf7c70212a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11aeef0b30b998bbbbe44391237d4772e5166cc7cfb096a035a5d97f13b04e72 +size 28770 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2de30dd3cd088ac2c6d3c24062bd3f21ed79127a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28b057d42d7d1d526aacea93573c8be03b89bdd0ea167c70e292b405815d6183 +size 74377 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..efea66c9fbb64e4d60b2ec5420371f8419b213a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09722ad71b50829cb2c7d7997634fd481e2b27496fd8599341aa8ee125ca1628 +size 30084 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fb9381b7e5970e2bfd4e370ca6c6cfd87e933fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4feedea76e85215d35b7ce75e8801b2d0631a7524840166c635c281683c664eb +size 19133 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e05413621b7bc833e57464410246de403f60730a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ffafe146b39356a9d97532421f036f7917296cc3ae2327f89da2c1bee2c249 +size 87956 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b06c8ec1190c1643b643d84b1e5cfbc8de7f8409 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4149a1fbae11610f0129e94d0b958ca707ef7b271b02e5664797293cdedb3d3d +size 49969 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6796707a483b2d55404fe48c76fad37b46d26fc7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eba382f80256b004b9f56fec2f6dcb78f14c7833c8868f169600d5f5b3a74d6 +size 31590 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d541b3966d1aa3fe6cd619ab9405162e35be3b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9641aaa49b7a07f256ac860fc622adc39ea9a377f7cad29bc6b3de58425e2e9f +size 145359 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e786c722f449280caaf6dab8391c0a831b6fd07d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84872731d1ec7d47e95d80b193c2b51f60656b6ddd5a3c7fc7f5833c7ffbdac7 +size 44521 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a9d705382520cfdb27b0b0625e4eeb2e6699a0c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e081a78c07127cb3b4fcbfc636a2401f1860efe23420ab7c503cb3eb7328c776 +size 54404 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de19ba628266536485e233c3abcca4a62da379e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32177f3212edeb0d9b2f8f22b72b201f9bf58eea28ac06a692dd48a605028de3 +size 92702 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b0cecce87bb7ddc8761660bdb7d01185d72ddae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adff8cc4eb1ddb60357518a74473c6c87933321c42dddaf4dbe31c507d9b58a4 +size 57017 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f1d4207f8324578e18ed81b234af6097e590205 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d43c55819aefc1510a92f14f5f8efd04838057222ec03cb13be22585d4323e3 +size 58415 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53ab3e7273a825cba4afec6766541f6ee8a424e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb508ea53b973bbb664bf257e931eb253983abd602f0c2b5e4b78be826c01bb4 +size 42847 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae7c61ce63a4f6c1db9301327a1562c05bb01dcd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6baee9941b27329246d01939cce8d12ac73f2238fe8bfd330af98983083ad3e3 +size 143860 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8afbfae16dea4e7364ebcd6e709f129ad3bf656f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:073254171065df455241805e670cf962ca0b8fb89b9292155702b2068f4e2278 +size 71080 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0be3bce0ed0f40181649d04178ce4dfec5fffa0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9545310b92cf2624bfcadaeef5243c1e9c17fa4ee585b61099cdb355a5abc2 +size 161910 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2180dd39490904511c24bbd3420acfa7d8ce6801 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0653e92d3488b84749780de8b6bc0618f87bd02105707087a834a0c7e9f25bb +size 209313 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ddb0308984eb88898fbba780771d3fd7f0112289 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7fa94ab41cc60f8b954ec746b55d588262e272aceb430defa4d16183a2afc43 +size 49834 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe46d3e0ed136a6e7fe0c07c547b0c3f78731679 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1c6342fbfdab540271fde990f05b4ec5ec43ba1060916c2f4724457fe0e386 +size 31506 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51a32af106467d5e4924b4124f766b3241344a89 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78d2468a7e7aa30db2d2774569ba0b3d51fb6fc65917c4289683300af30a2826 +size 36042 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ade3b6280a050c42808dac4457f8949175ebdc38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9069508fd113742bea21d8f31b2da356ed99301277c2a88498b165f8248713b5 +size 29490 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ada8cd33fce6e28c27c682d8706dbbc0b1dd865 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:535e32ddb4c7b0b8ec1a66472d0fc3a69b2850c6f453863fab2d161eb9297735 +size 40284 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99db90d9051a992f6f04ca32731684c36d827be5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2644290a81b92dc024865a21eb177f0a62cb018ed8b60d135ed2787a54fb1d0 +size 27001 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47c9bf3f65de63b48d662d2309f213a5a1fad961 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf02bc5797eac52e167863b6c338d882cd02bdc88ad58e7ad810e6bd6577378 +size 21723 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f94a924fccf3496d458cc520f2b262b1be81e2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8edc6abcee06065befd4afe593b5e8c84f10591de22fa37e043b1b3674b3ef4 +size 57372 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3da10147f26d71957562f5dd251ee77bf1fb129e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58768cb4aebac61a0e8d5837099433a11321200812d33c55573ff6ac4e01840e +size 22271 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4ca3a4a3764fe110c01a65a73f30ab57955fcc3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4feb3fa337a6b6c58ddddaecf09d90b24182245883dba84603712c9707239e99 +size 169270 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ec999e2ac1a52f1e5aaa16a76c4cf8696bfe127 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b76aa114119492bf5c52c35a777e3efe099fa55d7b5f700c9d5661e1f0e28ec9 +size 90125 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e76f061df7a8fbf73c6bd382e5d081533536361 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b888af82cca1cad65c766438ea8fd2b9f6af7617d885921e6e7f94d7b5191ed8 +size 153618 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebb10d187a1e3d13b85b5ee6cca3ba3f37660b24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca29763dea36d2225f6cce567a76d1d186bd40eba18466979fafc89b1a215c6b +size 79393 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..215c9b4cb56598c1d7d32e96c7276c9e7a43faf1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5742a7c7868b1efa0545d47bcc56075a68a02e60f4b799ea63fefc4c4d2fe5e +size 74269 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0fcec92b1216448765be7d1a1c278659bf65802 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af4dbbd4576f206b2965eaa4d78a56b7bc92ce3cac9e869d541f92613f425aeb +size 82559 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd15f012ebe4346379f0a1688fb7302ae5b0abf4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc4a64c7338197be206167a619c37ac2f51fe5e4d5f8e3602305a5ef8b9d71b6 +size 89865 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..972e23ba1d167346e1a59371a76307f6a555c1e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a075ebe87d3a558b09a5ac9cd19f3795684faeaeb0a4064cfa3c0111fca84dc6 +size 1003677 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4400d5e02aa760539dc6968ba98cefa80f432d15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24150fecef2a8cc59b131195132dc31b60990b5e2bc48ff43b93451c98d92eba +size 132881 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d4637ca0911cc9adfe284f36878750b090220f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe6b26a968ac473d4c9e0bf33d6258f7c0c22901fd2cc31046a97647431a62c5 +size 182914 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3bce5a892ecd0559d570cf78f7366f56b57216b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef7ca543baae408db254101779d98c26f863f6b90c52b344518226c837abb58b +size 27523 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e45175f76225b54f714bd9fa043be1e5dd955ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f012bcdd36eb319db49b4f7a030db142550292835a256abb7658f7d9f6836d0 +size 120334 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a3680307ea2d1b095d7a493c2124e16a7e106fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:364c0a09ce83a4998f2277d54fe6b573ff10216060bbb665bbad70a7f47aabd2 +size 58011 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..891d64610cc55ad2251b39bf03732e77f47e12e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42c91cc8130a140dd1a44f2e7f96712a030d9fc1a0bebcf5f41c0ffe4421eadb +size 25425 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15507da1efa22133bf2645e64bf159aeeff4980f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05eadbca61350938581dce8c50d6938094222c1aee364e521355582160c0e29c +size 39478 diff --git a/eval-results/mmlu/0/ckpt_186/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_186/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc294c4c37c75eeb49e2fdf9c96c9f887b2c7151 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:075d91d5ac366868d08d6826a610838f96cba0a91419bd0059b96bd8232502e0 +size 32919 diff --git a/eval-results/mmlu/0/ckpt_186/results.json.tar.gz b/eval-results/mmlu/0/ckpt_186/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abc9f4c258fdd13e745f3202a3955e0425d11812 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_186/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81d7abd4887097149f09e6b6beac33ef3e3a75a6a342ccbbfe72592a308066f2 +size 7578 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..adb71f3747fa0d7b5fa3b9d6145c6b07310cde10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d1bf7ea698e108055dccc688086b464329d98c916c8ff6e18684b3613cd7535 +size 16968 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b086a4c13f9d767a11fd80b355f7a1b05ecf11f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9474a50278b511d3d68079ad1e600175d2b009e471abadc7dafe3ef88885dda +size 29759 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be15c627e1fda86a952f17bddd2fa269649f6123 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fb92f3b91cfe245af346882ac986011eecabdf5f01b61b992143d03104a4100 +size 39791 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f949a4340b5610b50bd5cac0b04c5a228661036 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8266c362618188a81451abc9408d195e97e680353359281efa157a43729373ed +size 26734 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fb57e3282e116719d1180e54fe0626c96ead966 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e323fa4cb5c2ff1e60720caad9ccbd5954984b7cfeba3e7f9aeeefd90443d45e +size 61163 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37f26cc46c5a62708400041ac17dbae68d17103a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0278048b3636855119465672a5be5946273e6c4f5210dddba0cd287d32116073 +size 40329 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7feb3255f633d1d9d5bcf9da9831bc8f76e235a5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:065fc6cea46aff6cb6db5f2a540aab761dc3fc393459a99c994fff315b1893c4 +size 23751 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffcf60a823f397cfd160526d7084ee01fa4ce4d5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee78f708348d4abbc1a50f9652d1b18cd239ecc3bb66b96db3758319667a667d +size 31068 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae80e07af0b0870f39f23fe22b2721cf6d0d0aed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5ff7906e502f997a29891ab353b44d537f06244a91337d18becedbcd4b91fa +size 22879 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f7b5bcadbf5e00d48f8889dff7dfe72615d4cb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f769342a7713bd6c2d2f4d6266f7f2838de1b72ec4d838169c12debb1c3ef7 +size 60822 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d4afb2f4e1d1ad2effb3b61222ce9817568c2d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14cef7c3860c2a70dc54f8356d81d5658e3e5dcc6249c1d57d4c0abdab5abaa5 +size 25665 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94c76c885be6fb7b3a76acfe25c01464ed4f200d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35ec8a76b3ce23072e519ba03341d4464258bd965c2b3277d2d3394b8cb62008 +size 25781 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..722ab3e0bdc933c065c7ee1e14ed26fc1dacff80 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f42bb1c3d351260e0ad634582f968fe48c6bd20ee3087766a524e59193a6397d +size 46448 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6ba446532f519ddcd393cd06d245b8ca2dc6ec7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faa6b51a250c75687fc4b1a150de7dbfc9c7ae5839f39bf7b9fa3d2fd9bbedfd +size 31504 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..403f8db636876631a59f7c4696d7e8a35ff83b8b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5de4f236bc119d1890b0db225dab0794c58a5c9fa501893d58c2309ac2d2c9f +size 28694 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce3e37ae14537298c78ae04e79ebc3c319cbe56c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1bde846d2c378e0ca759023acfc6a1311dae850f0e0fa5dcd62b40c1d7c09a7 +size 74435 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68f77ffd0d17d30b4c1cb5a062acd3115a320115 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0586ef7a1c86f2ba91df2eb279c12a29d19477147b4c8543867b6fbe0835e4a +size 30093 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a576751efa4acc85cf5d6c03be8af27758c1679 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1156e0ecc91ae67b71393af165c4f246d8860f13a974bbb6fd7b9aa9d1450353 +size 19092 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfb6ace94f5807bdaa000f37124384dbf5163879 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1025b2cfd74230a5ffafc0d1233972cd662f656bf822c37a8c792d5d6524fa19 +size 87839 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eaafbda69f256b08602a0e9d790a0e00d15bb173 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e552127d85d2c6c98f49060bc70c59e0348a68522f9fe0e40f626ac0c773c56 +size 49881 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95d302899eac9034794b87c92eb7bf4f8e976db4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad9411fb3c87557f5fbd640b0f0266e6dde3cf2853f49949d81fbd39abdee9b9 +size 31595 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55e52bff1e823a8284245f67b15717cb59b0fccd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98dd01ac7e89b4f38a22e5a8350a40a071b7320da37b328acc4e6759aeada5a3 +size 145238 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..638b980bba68d9406dbb3fc5b5abb3b8976b14a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fb695e7a871166e5d9b3696ac39bafd7b74eec71356149ab47e2294a9035d9c +size 44488 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e71d25eec721efdef2869655f444e870e8067d5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b97d148a66f466e1ee6c2035e414283b28eeeca2416ebc9bed9c3280d1020535 +size 54415 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e75a90c04830172ade7c5f50b2fc53acdf0e94e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aafd4c9915fcd4210e60eef5ff7752ef8d016ea5019f2bdc5513c2202b5f2510 +size 92640 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b02d26d309f8613e710dd07123134570d1a5a942 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a4e84a6e6eee6d186e1b9e406e3798e6b0bd476de720d6fe4d260dd93318e3 +size 56996 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38cd22af5b42ef36034bbaafe5ef144cc12fea6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71c4b666862d548671c8e31f3c46de4b9b9fc4b5395d92cd4dcaff12bbbac89 +size 58367 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75640ef83ddccc41885232daea10bcee1710cf12 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628aea8200805764aec351bcfaca682e523cbeb406e15527451273f422ce0750 +size 42718 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b22ac5a61dacd6251ed6e2e06ef408d22e751bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e4512c1c83a3a82bc83f941de6d60aa617fcd73e6d06ad9b62c194c79c80b0b +size 143769 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2d58433f4a839beed5531593add2c88abc0e58f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52487598f5b21208b13f3f5d07f642e49bf8d999826187ec4f17ae365550b198 +size 71019 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..579072cc2c91bfe263a6a637d0368472b07f3482 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b793a1d490d6938343d5289ee8da0ad7d54206a8337b98e8b2bbf74ee1d4dbb +size 161873 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7dcb7112059ef94c631ca594b80aba5eb71362f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8656d4fb4313272483cb3ca3eaaa5cf50b6f54d7997dacf5c4f5e7fea6bdb4 +size 209273 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09b7c68cb034e12da95c2331e6c550f07656bf5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2e717277b44fceff134a269f8b91182923b255415bc13907f12f93386797e20 +size 49869 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83e87cd4c4e4c92bbe17ea1620e28604c5818259 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99f46005ad1f2aaae402210ae09a843cbf036347b5516cb17877f69f0c5fd8c2 +size 31506 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e42468501aa4e71bdebe06692c3856d9d8a8514c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5ea35c72e7b6da336b002e79a44bc8516d100751b672e3d05afa0bf33c67ee5 +size 36038 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ddee8713728b3b4c2c1a9f58a8a9de73dbb6fa4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a234988263755893293e46da2f0515b3a4851f31dac25d662716855582634fbe +size 29428 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2606cdf9f3798010178348013fd08ee444fe2d77 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d64e64ed4681ac2bc59e45eeb69436584e0440c0028dddaf25679e819f49197c +size 40349 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fa4df9ab4008f85deb067fbc79a52d822c74f7b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700c651c956960349dd27be04b2c1160d07b1754a55e555521b95ae695252d02 +size 26998 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21cfbbffa28fd1b25e976f47c8b2573c4a2a1eae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3217a472b7b7a1532c1ed185fb39da2786d2931a3772b40add71ccd0bc619b +size 21682 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbc3c79f6cffb14eb4e6b51a66c36929dbddb323 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3c52e0505ca32a4ff199afb8c8633260a2d8d3f79d970db4cafd8cebb5d363 +size 57347 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8e408d91a8b3ef95dbfe04b7ddd59a9c1618039 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8713ff089c8e4699b630b63ec1dc830ccfdfdfeacb01db192bc23d3e4a3cb15b +size 22215 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..183e85a5cc0475ee73abcdf35a747cd218e92ae6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b30e412b1088631128f3a69b0980dc29b24f0dec46e4a80b14842f2842b4515 +size 169022 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec9561d3069c6748fee50f3848c7871a5dc74313 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0724062df21dc05cd5ca1f3f986051cdd6d7c7059b052ed6caa9481ce8ed5122 +size 90176 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80fd30b1d0b6be32e35128d859d912ec0d0b4497 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beae2c0bdefe4450359a2a93cdf21c938c5d481ad0da5f60e410d76194deba4d +size 153707 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8cc17bfb88e031deddedad51d949bf77e70c639 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b2c0266db818ad6b258ea62e0a8a47948ff81c0edeeebd748aeed7d0440460a +size 79363 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35c5803d522ca3aa31b7e24e3577b3b19b7baa01 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f6bcf0e25a10b10ca75ff01709ab7327130e4f7e082b47a7e51b09132819c8 +size 74257 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4a3b9261bd6f147bb09ee7857ffceaace323f04 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27173db86feb765285f0f5d273af276b159d6d2d3739d5265fbdc7c818c06c12 +size 82452 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3958e17002437128c2ff565d94e1d47ae9d83791 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1909a9041ebbe09c2aa187bffe84bc61a2a0da3a2361fc4f0d7c05abbf11a3 +size 89862 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c38be88d623a2dc7b8828f43dcf0d3c0732628b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba50a468c08c31a8d987b495a8aee870c1a84c80c14878b8ffd13f929c2428f +size 1003867 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0808253d04c68a021798757f8f9dbddfab3e779b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2987c2f447d05a2f543555b9e92cbd290a2ebf4cf6259a228792e6ff7010ef2d +size 132745 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba1bb01c4686cb3b375a2da69e686d3c687f2caf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7d7ad21bb86ca285b2e7033fdc68505ef9675a934b280034d8c7552ef8e5a2 +size 182837 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..791bd9f7e1ad24258d359177ef4b6815dce3dd9e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b38722c3c5e5292da9066f0801434fe2536736bc14723b365991f35950c835ed +size 27521 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..550fb174c41b1a2a460aee87ad95338cbd58dff5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a12e2e18483ff53daee80dc57425dda00e4cce588c40228d3849867ea3163ad +size 120295 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebaa3f4f9a1f5ad41bd6bb3fef2ced66543215a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:296899ee59567b1234450171ecb49febdce90ae7d9f053555f09e56d8c9a7f93 +size 57946 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42a50eb9957d6b26e96bdae2107ad8c4923f92af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17769066ec29f4bc9a92f5686e5e2d61723453d705006cbfae3af75bfbabc504 +size 25432 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..530ae956e1c83c6bffbdf4b57511d05225205bd9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:997dd79f7c7148332f85115d7835565a5791c827b0133da7b6d5bcd32df3195b +size 39420 diff --git a/eval-results/mmlu/0/ckpt_189/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_189/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e07fee966c9144f6e849b0af2ec8fd5320ac421 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82c68c8e8ae26fd901654dcf8e06ff35b5ffaa3b71a4b537c7c9f5844c9a24f3 +size 32906 diff --git a/eval-results/mmlu/0/ckpt_189/results.json.tar.gz b/eval-results/mmlu/0/ckpt_189/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51e3d96e0f451d2dd08edf493c65256efe377e6f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_189/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5ede24310bfa11a0cb20203309e52168c23ffac2b7c692a2895ad2c05c4b89 +size 7611 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21c6e2c749500f731bbcbe255e4f323aee50da46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2a05278d9af4f09b85f548f738340a9ba5774eeef1f99ccaf5e507dead46ddb +size 17000 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..829b2c30062b9eb3ef2b7622e505552953ed428f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f8e86aaa0c2b8968163593faaaea80651fff17d014a675a3a552330cbeafbcd +size 29784 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a98061ce7b987c85286f8e25d125bae202ddaf2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5582f04bf1a02a28058342f25510e2f4b11d5d2ab38dcf846f68b4d388b02590 +size 39813 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7adf4c75f0bcd4efc44945fdc87ba241c79bc73c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f23456012dbd604ac9a0c8038c78695940784aca711948e76b979246650e82 +size 26740 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1714d92d7b1a94ccaf61c6b373dbaa78a1da2936 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca96506802aa8cb862c4d3c32522041006099955b1d7724defc67af8b5fe0efb +size 61215 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acecfb96c541b104cea2933f9341bab28828714e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb355127d2251614bdd0b28ef9996f587abadb4513a54b71a527be261a9c73ca +size 40406 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19bf986d03d0b4599ad98bda5548ab6d48edd821 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87b4fb2e923cf124d668b096b4dbb88c5b70125a289fc45b280d2f932b67f324 +size 23787 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..626e4d7bd97e77f72baa3d7e0f236c33d10bd80a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3059838a9d626fcf39cb12fd063978e24c2fff83fe831d52e9afe634f8b00b5 +size 31092 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2729cbdda002df47929f29f3aba3586c962a0f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68e2c2acfefdc760169670ddbd8e446deb8f7bf181005c0852aabd04385faab +size 22923 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfbb028e25a4da02d7fb6d3b752c121fc031832f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf2aa028eb796bbfccbc00ab31afb68a258c4e3e4f870070099a99a6511298a5 +size 60883 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a2f90b163ac815bf81fd76fc65902e36596ff76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f346a49675571dff9deb33abb22c8c6df25251b336645a879369f1831abd417b +size 25662 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19148c78d51548e47fa3345f22323991e71a21a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:103076f103cb7cc3008becbd4c0cdf65ef2ebc600306686e059ae6b69033ba69 +size 25796 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6d874096c3133427cabd066d03c1f38d9208c10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3a729b41881f1f249e6f1bce64b3ed692122ecef2e00472b8aa6c3e47bd3de +size 46444 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e87a8680bf2e047467184b6910d50e24f12f64e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a2f39461730679c79fd61c271a2dceee7b3870fb518d466d005152061c311ec +size 31496 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..193b6285372a1a0ec6e78a671a062a412542f840 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:543829a417de73fcd1154aba748ca9fff339ba055404b254ba99cc7942ad043e +size 28772 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40d2386404a7161e337b4d11c8f69eb03a331a11 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43569ec58d490eed4986ebceac0938f719e0728463a00d4e4cc17d5401cf1649 +size 74481 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad5fee9142b98de7eb606741b5385c508b1d663a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5226c986bcd50d83ca5c55d01c58c1ac2760fd31e038b41237f88621479840e9 +size 30039 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a84e88287af8322a8626a5a4c804723e61f69327 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ce49cf6618f841e5643525be50cebc112f98e9d0317b1015d734328fd2af96d +size 19106 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..805db96c5b52caf6a25a56ea410596523cffa05d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:947e19e0193c1e584c15a8a10a43980402544443a1b235005a75d85dda24cba4 +size 87992 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05dfd0c08e9583ecbdd084e80972ce46e2f28ab2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c308dc284dc33cdd3f0116c15e9bc9a38183a91959d1cb10757f8a5dcf0d3da2 +size 49889 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c99a75246e6f76ee91af6da6af7c2d54904bd6b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3710161328b32abc91781653c08db245cfa76d8dc7b4cff6aff6ba7f1d1fe977 +size 31606 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04c24e347ec58b57d43cf81ffe0fa151391b9d8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53bd28287c3524026fbaaefc6297cfbd574a7455851c640ebe395fb35e7f9cc6 +size 145349 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..562d2b30864303ae5446aa6d505efdf529d0cd88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a342a1a1f8e2ee3b514137d99f7236af916ab9db34fe463aac419f83fddbc8 +size 44565 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a68f5607fd45c79d2a5238489d4073b8cd0ca970 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c538fef04d08e079bf3b25661a720d68d7374bb00a7ebd81aac8bfc46ba426f +size 54342 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e5bebfa51c8f6562b2cac2176975ff1b5274f5c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8aa97b67f413e3f483121020e01d45ed6cb28494c96cab4ca2290b87b28a6a8 +size 92783 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ace49e18b063d4d1d1a47b34593d554dc65dd15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82abb09c0c514ea00ac72af799aa7d12dce4977a23e63b63071df2a37ae9b622 +size 57003 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7abe796cc4bb2d6643388f54ce6ed8283be91e07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1be12432150613b9ede1b68499f2d7a4ac4ab6b4c72c077d6c020ec1a0feb2 +size 58404 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..725992040de8866f5d81d72f935d4261caabb45a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8177471c0754daa61beddf71793e604c9e3e7d87016a78acf08dfba5cba75d8e +size 42820 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67f6f8d18730743935def6cc56f7b24e9b746589 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32a0aa8e6094922c57a10912f17fab6197e2b490ca2141d9dc6a8951ce43118 +size 143841 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..304bb539205371ef6e0d3f6d5189a1aaebf161fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a07ab5d2dec0ed3e8beb29fb533135028226ffa22c724f318d2254d2797a0d1c +size 71091 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07a7fb5acf25c4bd14a86bfaca1e42b3f85e1a25 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab86621d35f1dacf766965363c9f590b4bbda6f54f58b9f00a6eecc179a5bf75 +size 161917 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9eb00330dd0f429130d00e9481d617bb994f9989 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36df95cb2952b20d09fc76795d7df1fd7ef899919e3f2cf5ba632619b35a53a7 +size 209430 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de6d5e1987e995eb5d0e5706268ade0e13ac844d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d606a58c9583bc4e5ef4c28bf3458caa0496c0749530525f1d5aff3e63e00a9 +size 49809 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7325457fdb673e284a4261effddce9e6ea613cd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a17deb311f3406af231504a67a0df88ca7faa655caef8c7aa9683b6bda08c8c +size 31526 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e188af10b93a3be1001be65647338c33c14f293f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4bf3c0c6df3eac68e95fffc714e7e3f7c9b7f0742d6e5070e5edb4d5292e682 +size 36053 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b303e065e570f3a326d8d98b5044550ea47c0040 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f257edea86bc94245ab31dc9fbc4bca173e630ff98e879aad483d122c120d070 +size 29475 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72c48f19fcc673865c08a54387eb95aad0def503 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e333e150b0bca8da1b28de7804b6ebe926cfa0089575b347dea3b1165e47e036 +size 40297 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..788af4983eb25e8576a8249088176a6641949b09 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da198202a1aa62c614a0856dc47dcad5325dbc2d35649e02721e5cf1687fbc0a +size 27018 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e805b752945de0526c065fb7c41a8304e26279d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a308bdeef6d0f54da46b95ec09a50f242e2a18fd9b77d0b25e324b2632811919 +size 21717 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32ac0798178e9f74194de2cdd6d5dffb48a16778 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4386b8978d5d4b457cf99322c34ed5df3f513d350b26666b6ce13b3393978cea +size 57363 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fe8514787ac6471f7f374adc321d05e77d686c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c194816017f15ed6e5b27f80b3d5bd2a37cb623478f83a15770246556ea1cb92 +size 22244 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20cb66d86bb130307cbdcdca998b240ae30e85b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5369b51e5a9dbf53e89399f28d02c07e7178f4f6965555ce8b8953d1d3fed316 +size 169083 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc408891dab90856a412e484645b372d3b36b19e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:439ce79929499ff92406c2d5d8b0795afc4a5d648dc7b89be3e70f50ba648de2 +size 90145 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10da26b2d5585cb9cff64ff28c267e0331b543e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1033ba086bb2ce11dac70f08195f0659d4b82ad4c730997dcf3d2336ce33efcd +size 153647 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b500275a324097ca018ae4546363c13a65e2cc4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b7d5fe56b0ced3e7158c3958912c361cd51dacb3f663c2c283021c14bdf564 +size 79387 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0957c67cc30bc706caa4c9e47341c3b6fbacd6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3dcd069366dedb919c9e895f6b934568a5365877a0a55c3001d7fc3eaf394aa +size 74233 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..421555721da3d559e3b9a9a9b171de70fc7541fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:730900627c7d4b851d4376e2b8cb0fedab6fbd110c7e04f893b8efa1565b61a9 +size 82507 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d90d2e2e1d423b74a34c2a8ed4d5f351b1f71f11 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c10d2817c8982aff1ca2f0dfa728069031e024252dc48adb6068fcceb06dfd5 +size 89833 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ffd753ac36696d864e3d96ebfa6f6f4ded52489 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a4c744873d6cfb4762f99482751f35e33d879a57784b3857692b2a40fa36291 +size 1004089 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d623da282f3fbb22cde1ae6251eac7bef3a39438 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee69d853276bdec938cf29899ed5d9a447d83925ad9527ceec2cf90eba5d4c2b +size 132975 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c1bc376e449c1ada594964941c604408ad5d297 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4edc06c4e7e613b3b9426e8da263c6f7ef2bd78d9d649cc710de2b0702d8f052 +size 182889 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f0292ad01b9c20f6b21025fc4960b4131da8d81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2493eff8e54bd4cf8448663ba2ce698a69fafca69f9516c5206b939f05723634 +size 27494 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85b83cf99a703fcaff5f3c83993d17bed6698d6c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bf619d057ab1d36b3b65b45b020a0befba316a309082c44f753273a82320d0a +size 120450 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..081010746f238963183952c43e3cac270cf44ab2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c35fdb28175a3578ea2475bf0d5ef43bb476e54d25d47764bdc2aa04e6d5da73 +size 58008 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eda6c69bcfd807e8b6b3d578e7c662d392f855bd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5681d463047b4a8810e0b3ab0c7f64a616294126a59bcbd984c6bb745e871234 +size 25440 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..064a4d0bc81e1e3e49b55d48af91501275448ead --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13ce4a2ec4ef675da5b5e1be611163d9710d82a0b6583a833336269783191235 +size 39454 diff --git a/eval-results/mmlu/0/ckpt_192/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_192/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8630912674cfd4cccafcfc792598df2ef7dbd957 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:419727b0038b2334dc4c57c8222a25b5509d398d9dddbee8563d2d57f2cd949f +size 32898 diff --git a/eval-results/mmlu/0/ckpt_192/results.json.tar.gz b/eval-results/mmlu/0/ckpt_192/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffc1ee858fcab916df496f978420ecc34c602c81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_192/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed1b1a6cd4c030197ff9dbf2c201bf6abf2fd6d1c6611291e0563254ded4ce9a +size 7584 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b5c969580c2ea59de9bec451af5e13f169ef629 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95420509bfc59fee77113010bec383224d6ca55ed9c13f0ac94ba012d38f0082 +size 17075 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2489cef9773a5a033d117a3771489b6249a43144 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e74196297dc1ad6469771baee1ac0020fb0542bbd65d892aebfe52eda0076c1 +size 29805 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..729902d6ed18495b6a5daed36569c0cb83043217 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccbe37329f6ffa066693e22dbcbb60681b28bbfa91438b9646adf31bbef5fff7 +size 39844 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e2e54e00c94cc4022e2bc46f0cbfc3d3e31449d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606c80a70a7726550dee5a13a741f288ec7bf72b71fa925dffed8c42cc5c4f98 +size 26775 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d9138a948ae443dedbef224de60468ff4cb84fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cefec76590afab0cd1ef0b9e177483ac26b4ce2448f8a2ab544dfa128e012e8 +size 61247 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cffa7bc337da2c1bbe01f5cc3668ecdb42351e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc4472c7aa6374ae7b6a5f883ae17926ec1c49b26543c079876cf9b3fbcb8916 +size 40381 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8bacc66afa5b08c3b04a9ddb2c6c0d9a8122bea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe758c6009c0090cc7c382afc64a39d4c1d4b49e4823f018f3643e044f7c0e4 +size 23767 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8e1baafe3e6718ab9a6eda2edd596a5b4d8b4ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:206d6b94ea98beb249a88ebb07efa1d43a1b8d0b689cc59054c19839e8bf812d +size 31064 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34632ad108bea4c670f6279c91c4ef5e8fd2fd4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:746791ff036dbb588f6f919b5b8a395cbc2107803102e7f37e2381ef3ef0a2a8 +size 22926 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcec98fdc1136d5db9e6141e8f56c26952b7e477 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebd8f6946d9ad6782c71fce20311b0cf52701efc461747fa0fdfe9768edc6dcb +size 60973 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47e8dbc4f3f47b0548999858d52e064890d72fd7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e25caf43663398a93ad4fe550f4953277d5c03b72dde2d30642e7f68b16b54ae +size 25722 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62bf28b9389d683fb5dd9d4521c635b20bf4b4e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d78459570739ce8fd1a8f444ed4e1bc106972b273b184d2749865d128bf956 +size 25760 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73bb05344160f21db26058832b4be4661b5d94f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db8be601e943dae201fa453646c00a824fe4e9c12ee79b74162f8b23038cb164 +size 46487 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd480390eed75acae85ece1309d6bae78889e422 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5c10f810c9bf5d829d598f0467ba89608b2a015fbf41803b1cd993e76026f91 +size 31491 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4738e3fe82f126ca4f3a2bd391ce4131d1c9a69 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83868304f208c4c3841fac5a5cfce760dce22688fb1a2e8237da6067643211f8 +size 28772 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9eb67fab8a3e9a784a35f10f638567e5ac5ab172 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c762bb71cf5efe1912c9c17a8f419d9927135465e40aa477233975d3225346d5 +size 74621 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5fe653b35f3cb822db8db4bf1ef4b22e91c15ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91ab65d799eea1d6a08ee5f8dbb3180a20527e34ca5ab131cd6bf7f6e055be7 +size 30065 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..453c09cb7aaf11bc4c6ac9abbba5e0f90eb48d4d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d173159dee555b4d172f38643d676346d3c0a2df3d693e67c229b3a1362eb03 +size 19127 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..711c42a10d67cac766fea00f7b19e7fc77d9f597 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5889232b84f6cab8a90002c8f176aa09667e762072f974a25c4d711ed7c8d3d0 +size 87930 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8a32193c6f6ef1a8cca8c5f41b9be801cc59f88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68b5d3f79ba65a24686072af79f44078a1395081610c59a935b0bd2433f0bc0 +size 49996 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e90f208c28277996320f712ba2b75ddab8050f9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea53430c7a252c0ee525e427d4e1eb05d98625892243f8643a57a4b5170ac22 +size 31622 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a66895d86a31790efc8d962d807fc980d27cff5f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7993766d87cec2890db3c26ae3fbddc80962bd03d1be287c63a7e1a28abfe3a9 +size 145365 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78acd15ccc8c177be3268b04da5776f36137401a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe81549964daddcb26616147f99b3e74c067a7bdf1a8ab9c2f5a6145fc157cb6 +size 44570 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..087f4a1cf782346e1ea1626193c713560f83566e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e2c9ce07fb9888e12ac5324625438a92f3439bbd524ce87550a7bca2d34346d +size 54369 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df6a9b3c92860432d3eadfbb2027ed760ce846e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20674ccda83620c0c72f41b75207a84292fe312f8d76a7d148184ea40f648b56 +size 92726 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b1775b1bb85388b4d974eec7b5b4b43bd9dabf6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62120cab236748361a3b8972485596e97642cf6bde30110dbc2d1e2f2d8e45c +size 57074 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..946118d196cd040ac876509e0078d87fd8662d0f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76b3cc5abc5b6b978c336e04ab2e236821da1fe205fc7caf008e6ef1a841ea61 +size 58455 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47a7774e9e8a98a990ba586e7c6f0f65c857e056 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0074fb583c3b94d1786fa76b83ea33948939de8930f741768674fc1bbd0b03f9 +size 42858 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f36d687868e32a534b5f464b7e9b32c9ea25f00a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d840ddd237c81e2305aa737af18cb16dc9b1ef08167485b07e68aec9eb5d05b7 +size 143879 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88bf849bafa3668429748b9aa9b8297dba5f6526 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5be2629cfe53f31fa37054abb726e8be30ef01fcb5629273dd280bb0af175af1 +size 71133 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab16927bec733690f0abfe00848e1943b2ea8eea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7cec1f6523eb23f2753257fcfe59ba671831af1f98b836ebf5709efc9cbda7e +size 162073 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80aaf6f43d84e6fec19d5ca7039ad43d03def635 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87e64bdd0ec8211c29f2d22318b36d63e6ea127eb329cf5005c4a7efd1c85980 +size 209433 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f47d0f97e4d4395984b85b7bb8defd66e3f5143 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:446dbfdaf79849146b8bd60a7bfff9f774b001ffb74ff75c1884a1c61e66ae76 +size 49895 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70e3a37d1584d8bc7c822555a1f9fbaa3af5f9f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb899c4a6387d9550c26bcfeb0604ce19298d36c3d8372ba79d884fc9609188c +size 31515 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2bf86151f2596c611c0b1b6e3d5a85579812f789 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de3e2160d41d797f64a1f04fd694e38eabdf1d1124a96988a43bc41012c83f9b +size 36048 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28b9279f817c4285576e15cb572e31ecf5342b7d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26466ad2ae11e861bc666712c6bc6b9e163953d560b85e1d3d216a9facfcd3a +size 29483 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3388e255a87ae6b78b0c0d07297c814f33856d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896d494e94497f3d3ac3840f44969914efb1c870e33a68ca8d0f77ac77c86b7d +size 40323 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8455b82665fb403a56b797a572fbdb573ee16d13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1d219a2c43ebd7eb82ec3f9e2c222d8ff534108c6dcfdfd59928101e8e7328a +size 27037 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4113fa332f7d8aa7e61031c8fccf264fd7a951ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c96888a30df3feefd245184f8feaf4aff88da396da753b0a90bc51a00073e30 +size 21716 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5789b4d695bc8ada8419fd4f549dde2cbfb3ff2f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f88e72392b5f6c2ff0dfd4b28ead900a22be4fb1b81a2c6511019a86abf3383d +size 57421 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed6224b03945a3f5dd34531af438b35ef8b095e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6798ce0a7a78fd77acce8a05702a90639c19c14b2a87e777e2c0ac4ef584aae +size 22282 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72e7adfa5c0002b4f891adc66eaa4159ae9324dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fa48e6139e1334483e37a882784dfd7d57260d521de98701271416e3b73e105 +size 169059 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4850fcc358b8ff01cae0a1d01db8b0609e30d121 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adef0bab6f04acc832c696eb3d1da32ad4366659cf46ea8dbcfa2894fa555ec2 +size 90201 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac4b97eb528ad2d6d3fac2c42e25c362fec6c177 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb530b60d0f9c5e632763c2e029ebd9560d0d94ed41b8d9d947733f937b9a02 +size 153237 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50713b3ee3a56a751840f91f59363937977590dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e7e88fd43a33d7caa9eff13af4ed99ff56e5c50b60c44a4bfa80f0d1554d851 +size 79475 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b683862e629f2a50057652566030eb99295e4e21 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7312c9ed97e84718af0df629ac89ce61f0d9d34882c0ccc555056633de6c54 +size 74263 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c85c48dfbad27deb580afe3223b74e61b240a15e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cccffc20bfdd0d48dcdb420d26fd02aad672ac2c0bcc2cdb7250f0e5ecfbd8c0 +size 82502 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc0d7993c11e2511427b8fadadd635447b6635a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c90d7c70229f483f161e0b86ae1a6698587a61399e926ee9353bb5aed5816bfb +size 89932 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d07822be14b3717a9acc484aabba111ab8b2b9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f879b3032ecba1603828a3861fd157dfba468708d2cdaa0e626b0b8e6359ba +size 1004719 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ad1d46b57281f3cc1b2eb4d1b8d5b8ebe017ca8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f15c238d486b847ef2db65ae83c11506d81a9f2a27c33247c8dab06cf9efc3d +size 132963 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f319a69003f9e818b8d1a7c8dd0e81b60c7ed4e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5d08987e159e87dfe3fc11218811e04cd710bdedff861fa42526aeae9a63bf +size 182974 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e696955534d013fbe9ac77f8415a636ea60da823 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dc4db8fc47c4ade3b77d0df72922945fe04ed798a38fd4205471b0fc35ee24a +size 27527 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bf1e6aaf2fddf1be591cbf6d30e8ac50cb1955a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92b216669711888940e1a98b88bb8de625443ecc3d259401e4c39b0ab46a4972 +size 120438 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14cb808ce698d819808bf1b653e0476e337cd70f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d202d6121b17309be1556eded997608321030c3fab5095f65a855de5b7d65695 +size 57985 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9380d24560a4b3b8aa1a3378f8237926a530c912 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f08caa1de44660d41cc0d5cca52d790c279f2c6c1205c90b837aa84e957dc01 +size 25409 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..713df04f22e2e91ab2551872f8195068f092cc92 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69a9e19209e487cc9da5cd4c74897a23a0aa5754ff71003e49191224ca177690 +size 39462 diff --git a/eval-results/mmlu/0/ckpt_195/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_195/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a305a57e7002ec7a93c759fcd4042b7ce94e2ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34731c39142067c8bf3b6ed7d19efa541cd40aea00ea87bbe610c0a6d6e13f03 +size 32958 diff --git a/eval-results/mmlu/0/ckpt_195/results.json.tar.gz b/eval-results/mmlu/0/ckpt_195/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c34be88501503e50b275fdef54fd8d88edf8f55b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_195/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04d163b99034a8e8f56af7e987364b532d8a0ce3cb99af25765426fce6c18ac7 +size 7615 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c93e7d355188da1998e43d76cb0eee94ff2c42d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf933f8161b57729954d3955b3ca4a3c99f81ae18d484ad88100d13bf4334681 +size 17043 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66de018f3bed4c1ccb532483e4e306eefc3ab707 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7c962b7982c3653043ee47b4e46c0a03abb0e18558dae475da3b6dd09589397 +size 29793 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afb80b81259cf1a42d661718a9a9401464f5d92f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecfc562bdde331545a863e3816282f139084e7ccafb7a389a97863fb7173e30d +size 39813 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36fe0d13c254b08364d9670228d8276e91734923 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18e8ef1d34a339c1e79e3e084906c747c8cbe5042d3336804e94f5b18bb1155b +size 26752 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd34ada513b82de497d9c8e57db4935553cf393c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebab7be3099cfad4db337e2a3ee72f8f698e54cf7e5946299340689e5cbce93c +size 61196 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..710a7f1c9f528def452c3cd41b3c450ecd3c8e4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b54655dcd8fdc4bb129b5d12c81c32a289257f295b8c460f80540f0ce583f5 +size 40363 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c32be736e25be71547ca78a89882dfa72028ccd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d577f7b3994174458000fae59747e401d0079272d4afbc1d92dcaa02f226cb5a +size 23779 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90be2212ad50b2909bd4a51a2cfd7461787b74b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff67b9e73985b9818c205cbc5e34d6a8bea96bd9a5abf39dedd59b3f3f63e6d7 +size 31075 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bf38d5c65aabce847979972d5dc732e35dc2a73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a82db314304148a25d107194234c2d85e5bcc3519ea47614127fb7b362718f +size 22908 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f54015e0e07ce3e48a88e43a38bf711d538ba43e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60d29c6d1ff75d463566d5e433310611e44183897366ea6bd1ffdc2c1a93d493 +size 60918 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94327fa999b8ef9407369e24be497ebf6fc7e505 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01201ed24dd739c5a9ff5a7ad9b10ffd2e40cd4fd72edaccb1c006f4acdac055 +size 25740 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ccdf68dda4370f882453043442a126934189d66 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c67b6b4067f3c448bb54032f3977dff5237780f0f7f480c6e82e5765d1bca682 +size 25776 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa8dfc438b0d5bedd12d8eccc0545688312f7989 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:171da7070615a59b6173b182290c26e0dbfb7ea2896bd079021168ef077fd8dd +size 46484 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38bff3523a9b0b295fdc3c0463cbf06340dfd394 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:151f5d90d26d3c0dccfead3b9265ad8b0ccc2abb25f9535f1870f4024cbaa6f2 +size 31462 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be4136715ba4f49e8f6e27a5c433eb81fa175225 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffa9384ede28063d8b64a22599db33d5556b4bc7b1f04d15db0510d9a2bf0ddf +size 28757 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6299b4d304f5fc7f70d7601c5ae43a44ba01c34a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5081a7ed71fd0a8109564dbd57b8c8ffbe8bcc8532aece181dad017fe462e195 +size 74647 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2656ebca9e65331ef1df1e2947278e330c451c38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:144edc32b88f03e348aa59284739a1cb82ad3e5bb8b1faa922aa8779c2dea466 +size 30076 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d58212f3cb22527f5f9923e31f63c1768f59a459 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8199adbf1d1c6ed67392711e388bc6cbb3d3e3a70d8f39d5898594229e9e8b0b +size 19051 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b5491772646672726ce36bf64c07862e9346cb3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ae4a2f2295e4fe4bb43637b3e8fef590a15635f287b9743de2f060afddbf59 +size 87984 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..359411c9432ff2157223ffb26e8645c00c43e801 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78c4792cb40f8d9c8cf99c7d635af3f5015e5a5367828f7261a627bffcf5cf00 +size 49966 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a74dfd5ccaa957d203556db816207659f8a75196 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8213fe200cb5d23d567b4d35ae6afb62c3b1355911ad81653efdd23c93eb0ebe +size 31621 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b5ee01fccce496110d2d46586d5bca1bce7049d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382542ea95fbf4d7b4a9d5e3ca4d88a58a973bcd7fd77b407e5433de8a64df78 +size 145388 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ad58922452dd0f1be6e82f6d612f0ddbef07386 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb94f0660060a861f61db913b69548794db8f4c729e5b1453f9828c5f78a0579 +size 44513 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..feae2c38f8fb0740b0f6e27c01d2681272eb748e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad997087eada4b3ba602402537a948f80c8ef938dd2f6810e1ab41538c847793 +size 54330 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d30a70642c1e017e1c8b1047e3c89a76552a5a29 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb4f552265cb8edb8cd31e9cbe126bce5cbdec85fe1f50df048c4fa51c5c20e +size 92683 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d9507c6dc96e77e1badffc869a521588c25366f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41a9fda4cc2707d477a608340aece7ae6e9d1d96cff4d9f85bb7e9dbf4a7e38c +size 57077 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e874052577e91eae867c29be250ca30730e3174 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:278a5f98c4a8acd350fd73fbcfc632780e6f294717ac88c8e516ef9550479847 +size 58414 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cddf0ee0991bd9808928cdf116848b3e8ee1cd13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff25a65abc56ad9f40589c8e7d7567c3595a458787d2f1e4e4196ad1586b340d +size 42816 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..583f300f1735c6e067619fbe5f4dd20074d99018 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a98216e771754767c38a09a23dd6d6c9f781997ca9249d5ed64d03b297ccc28 +size 143817 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66a47527e7efa8acf061f50a0fd0f20b5dfb23a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b212e28e1f2b19de7eced594a392d04525ec6863e94553a3f86904e1a4dc0d96 +size 71150 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7b001e7b43b20fc3b6c0b5874b3a88154f5e30e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9483357d8f2bf08ddacdef8afb509814cb48e76a4d986d8c2d66cec845467761 +size 162009 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c54a0332555df5b591531fa43fe1c6fcfba26cc0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:989f1256607891b410a2689ef33215cb6e1a17eca776f949c386f1f1720e380a +size 209509 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0c7056bc89c1148339bc913b08f3a108f874337 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d76765e5eb4eff8fea962cd9c9a48bbbc7bc3db5412511f1317d7d8f6f161060 +size 49856 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d63162ff25573822a5e848c3c8d8d5b4820639d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35f35057f1e3cb77023e434e37a3f72b088ca423427a89c73be86bc9de5da58d +size 31507 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cd3362b580a4fffd5a831c70aa2540a69c521a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ad16db267b0b9b6232fb1f7b436057786a26ee76c97998826f928c2ff80671 +size 36041 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a5badbf6d13b6f79aa48326b608614467292bbf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c695f6a9e978b0486455fd9b88e29ff0e91c7c1da715ea39866d5d16d829fc47 +size 29455 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e09715059bace899aab2543862957dac800875e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f74773d6b31e388002c5160108b902011d3200848f8b84fb7fc5ace24913c82 +size 40309 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30c9dac2e9dad7bf57000f3c05f893aa690feb27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e194475822d257ddec32e45f92fcfca6677b62df4df3db8286b47eb4de9b9216 +size 27017 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47a97d6da20fbf0e83231c34af2be7fd49b7b7cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:decbdccfff9fc1d19e23c0712f0e10edce5ad651192798897f8865b3719950d3 +size 21708 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..850ed75f0620d4cc88a7c2f64c30abe95067a850 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53885cbb224f8d75b180c368e3bc4ef76910df9659ba0e7ebcf06a0139d88804 +size 57409 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19f859e2449c92d169ec3c4042f03ff3c83514de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78f7cd4d7f0460e5f9e640f41cad59e09f470b95fe6eca51a2140a56f8e081ce +size 22261 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4622bfc64e9792111e6fb371e0679e9f6323ec76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d2e0e9839b9a196acd0264e65ac01a2e1bc96976f4feee5e33241b112a60a86 +size 169099 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bde1261611578cfe52adac7a34b2f23c7173d72b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a3075ac3843d9fc54a8aa5911ec98d1cfcadd0d4ded2addec346a293613943 +size 90143 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0df61d22fac188005fe533dc87a8eca76f933dc6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cfc363600f58b36ae1b9d4205ce050bb4bba603c4cadffd199c569a4750132b +size 153724 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..413c5ad96d615b520fdf9add57af708a3bd6f01a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:304c2483542712354d958f19b0987ecfc4f71400429a9f4161bc3b919c018897 +size 79373 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b639a1366abb25fe2b2100230fbf06e06e5a1543 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:309fd1428311a73672aade81c9bb3b0c1e18d356497b33a9226ccc2112c161df +size 74246 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..800c91e10062b7aed7b6ac7c6ae3d81a10065b50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:490aa8be3689c13718841e638d27ba05be26fc8f61b15123daa04977f82e657e +size 82484 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17c0a6804a1b588fd0595fa3c8396d6cc8f57ca4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503b14a12511deb6c12bd65e175be2f165708c2dc07ff5d94b9656a616d5b7fc +size 89881 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fed9dac442b104f34eec176e81db9f41da5178d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45da3e129fee7534f8355d7e959ec7ed0d543593cb4b75980e92a3c30e787f01 +size 1004934 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3e4c430415921aa67845565d621952163529959 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c753c73ded6b062526d316746c0f4c4a1f0c6aaff661f63c90357ce98f261938 +size 133006 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2e6fb51bad99a4aa93fa70a9e5ace685e98b75a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2132bc815e9c07c52c0c7b5c763ac58e022b430503691ed93f9f1b28ce42a180 +size 183068 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2784ff8b7d127293afb50947448fd1d9b7a8daef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b5858a53fd46051d6c8c579af6a4b1bde06239d4280d5331ed80eed011a6a03 +size 27500 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..faed488c16e4a6074088321bc40d328f1ee91a3c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed4dddd65fb275dc66e9d67cc0832eca3afdbf5f9f7d643e06f4fcdc7b369f7b +size 120442 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1822df2f61ff35e0038b21ae32b379855b2635aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df141fe3567c90899283408c784fec514053ea016ed210d189aedc33b96fa5d5 +size 58017 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd052403ecab5b727e4dafa72d070043816833e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:becceee12059de2df48ff333fba8a4c187feac81cab2f01022d5881481ee127a +size 25372 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef653b0ad202281cb655f5a0ba07e64d2d2b8797 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5186e269695edaf66cd8d9b6aeecbcba4119dd838c408b81674f551b3b23e6b9 +size 39446 diff --git a/eval-results/mmlu/0/ckpt_198/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_198/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9642ed2eb264915902adc88280eb5d855f4cb0a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3ebb70a2ab8c036c2f957ea74b1ee11fba85217afd6090a5f882387a1bf53b +size 32908 diff --git a/eval-results/mmlu/0/ckpt_198/results.json.tar.gz b/eval-results/mmlu/0/ckpt_198/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74308caacfc5496402c04fafe73aa2c8c4d1342c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_198/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e722733f9fb8d253a014ed42ffc13a98343573155d1c1c27ed5e4f460bbb0cb +size 7603 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..424f181e212798c4baf4ae6eed27c5342bfcbea1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8adf77abd87629bf22e9373067c3eea402df92368a697c8cbaeac7a01134816 +size 17047 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3155fb85ae619aa69d41e23e492a483e687cd360 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1299093c68d12c00050fddea7a0d8f29a0bfd10629b16728e33edc69561d65fd +size 29797 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d9c951f440536203831ba4dff14fe4f63eacba4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bef3466c8fc4a6d845d0c4baf93212a17dcf674e7ad0bbd5bfa56bc375c8d850 +size 39814 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..004dbf44d10d5c77f5503b3fe615d271946b2c7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3392538ca736419c42d1669b1b16d517435d147a11ecbeda3b1ee9cac6333ca7 +size 26750 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a0c0e6094586a7941f9e8178b802e1327abb35c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de3e77320990969539b780eb9fab4045bcccf5d2373a27bb7ed9c8b9928f3fb4 +size 61181 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9aaf326b8f864ac3ee4218713c566281b98b6a4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8181ab907cb0a7422cd8add5ce38d2ee473cd862bb9a13cebf32fb3db99ae278 +size 40344 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c39694b2bc2348b6080e297cb06115e5113d1b96 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b3bea44655ae04d61046f207c62c1a23886f7d9cd3046faf8520369f8ef8149 +size 23743 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58c03ecb5c3e5f31c23f79cd28204939ab432bee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46cd60c40f072b11ced60f7c1d8e82efe6ad21508213a9774bdb1131be9c0d89 +size 31077 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fbdd590f36d552b1d083665f077952af30457d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce7cd952b9c3a919cf5b8a64aa7ef634a9f4b2c660dfcfabc5f1370264238199 +size 22899 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..337457174074802aeb83bb5253391de96c8ed0ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45e0b0028cc188ec9034121ef7825a34c0a84935fbb804c92451f1980ed0a53e +size 60891 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0d322401654bc85d30a028b9cb473d7688d8065 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7679412385c2392a53726eec813207179301190afe3b194c8c34a18a0e189e96 +size 25714 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1866b6b4b22109182664f60ec7990c43bb68cf2d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6695aadc2f8690316c40b0f4d9ae3c4569a0f0acac240ccd7e4bb5d1391ab182 +size 25761 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fc592e5e29d79dcae22b0f5eb2be1ac7d57a2d5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e00b5e4b2ac35a6592170ad903511b84c5ca9fdb22eda85f730e830df20a56c +size 46457 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..930a318877f66f04c10b60ba607f7bf38cb3efb3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02281882dd2a95890b1c65611d97dcc984dbaa1a949ba0f6f86c2b72dd0e4c88 +size 31408 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6bfe32a5dbeeb7f47536104b863390e5fc9391d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e409de40575a93161efb35a1d036f85c983e035fcf5dbbdf0a0507dc25450f9 +size 28744 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b146d6d3dffdfd1959a400ae6b585f48d2ddaae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d23563c3e2cf666699c3e735332998a6b93c93961f610109aaae04306c506057 +size 74499 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb246fb36c8d169af259cc4402c3fc3921060246 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e05eab016c5b3fb07beab4e95599696da005355044856aaa9d8128774e325723 +size 30088 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..584d32bb82bca89d484134eb50ca9336a766e96d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f3d0c8a970e42a1799e781674500db8e3bb2c543597358fd56276c6edd68a2e +size 19097 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34aa1593eaaa930a8c72f06b9a2eda52ae5c8179 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c5a7d64482c9ef13fe96dc44538dc4bbcf8e434e4bd78454301c30c51f35fee +size 87920 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c09ac1779e90d3cc17c1eca05cbe2fceaf3ded2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:298858d75178e51ee8c7c97898279a72c015ce0f78afc2b44a7101b0fb400eac +size 49864 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab7588647e27b8c869943e0b687afbdcd29d900b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d4eee1298847a103af1533c2787a5c97f4432e76c89609a4d51c3c5271301e +size 31575 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c985c3d056697a5b3ff818b301a625386e73f0e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1218383fce4a7af3555e34e44f8b9bc950f877eb5199e272250fbead39ff6a96 +size 145319 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2984001cede5ebe6c75ce64935067a9e0448fa5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c771b7f03f86aedd1c84b17fd135eba5203a2b75258f89a58980c7f235da6923 +size 44529 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc180df25cbf2fa84778145833dd63048bbd9e38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ee400ebe3af8b1e0d9105ffd8127dc6e0d3294ad98618aa5becc80e3390aa16 +size 54347 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5ca1ae9d8098bdf74f0610248b9a7fc9164f784 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d9aa3cdc549ca94436ae34e0f8f7f83272f0c02b51bf6a35e24d3a235200b0e +size 92735 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09cb786083ea07ff8aa8ccd7e8ffe9f4a0f18a03 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0673a402ddf44b08c173cb16ef84bcd76ffb082fcbe60c00ee42fed44eae52f3 +size 57120 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..022ae0dd04654f7bd8155a0a954dce5f50f325b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede3cc66815d494a61350d3c4923b020e4e070e73ab3584cfe4658e00d1c55e8 +size 58425 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c73d6011dca650f188746f9de7e58d0f485dfda --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc17692d1e10ab6e0fc0e103893d75153f12f0636a91cb80599598bb7bb5889e +size 42807 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1028a3999790e5f730295a293811cbba46189c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3faeba22e76ef895849e74ee5889f50f7ee760e97ce4c4aeaf3a4ed612be19ba +size 143809 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c0ce8d7e8c9181fabc8a0240a817a8b55e00f35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e73f26eb69c8fcc07362f7fec601f107a6943a774ad72fc08a217cc820c56ef +size 71100 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c00fa72fc84bf6edcdc8e325914bcc55c26ca5ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c50e8cf99eaa9b0505815922e552df411bd99b9a1b8b0bb5c5a1aa130358a12d +size 161950 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1631df5b6b091235f3421626e7bdc0f1fc3422a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e667432c8d95f2510bf63af118ba67266011cb7a04f1dc7dbb03e566044d0f28 +size 209407 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..845ddc6565c0972be1c0d606cabee8864a077741 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503a34e70c96a30076e6f7fecc49287a892c7c33ebe8958cc8e7a1797a6855c3 +size 49882 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf66ce53371e1d4c0d015a61ef1b93051003897f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c958d721633dc2071e8dd77a02dbacfa5cd6fdc51289dc76702560e9640c3221 +size 31500 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccfbfa502c2c8641726aa60134b365fee10404c6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3be263ab21ae787de338f9fed3c5157fe58127bea9270ef279c53355310f66a +size 36058 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe1ec86cb34fb4d9fa53431a6a9221256410d28f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caab3326a6205dd51b9332a0fe5737cb471edcad7f2425aed5ea34908a648db +size 29492 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c285420f6356241bd53bae21bfd2e2b53d45ff73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d56f4c5da8748d4588de3bebf20eb11cb8737725edbd71e3091d977945052dc +size 40304 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0da5f1dfee218b6fdbed879a873dbc0a057a9045 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6a9cdbf0ac975b61c9298635e992cd79de296aa54c5aa4a5241ae31d4956a3b +size 26980 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c29e56f58054955225ed40e420d4db8eb0f162b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67277cb8f56f6ae62aa08c87ece555893077e759169d0ac3bcc1de604d0ae19 +size 21676 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6ac8a57916362513f5b5c7fb28f2e716ae93584 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d3dd73b5a578b78be70ab081c55fcbc5f49bb899bea33a32239635f6246ab8 +size 57387 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f66f88f1cc017ebfda1d5a3d970ced3ae6eb66cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa845dad711b0a61c53e605b27807762b565c896a45eeb999b47be7253957a2 +size 22252 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..454b1edd18cd925bec877a3c523f509e45039ca0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22e1c99eb760a6c14ecd6d273176749708a2444ffba10dd304744a868529b2bf +size 169061 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cad1085a933bc36ae6308618cc720c539876377c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:296d0eca17cd4248cd9fc53e8895e44dca0a315c9828c0479187ca97e6349bbf +size 90128 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b54f0b241241085a6f38bdcda129e4f6c87ca383 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e09c1124a3b6c8a0dcdd636b774a5af1c6eb895b73da23c3e3f9fe848bc6de9 +size 153679 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8ecbcedfda342dc327bc8d00a8edc0165bcf657 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5491e3be25ee513645dac4052cb5bccc99f720df3dd0caa101d2a0d639a0e300 +size 79412 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58b1164c8fff4918ad6b7a214ae3f44098b12d02 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02357604730f515f77379f5a074edaffba72831eaeef27b260924a96e71b1026 +size 74327 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6708ea53c11d02c45215524dd2d1181e327b1afa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f866ced8cb0c2cfcd72911aac0e2d474892e0a978384f76d35cbca03e9e055 +size 82502 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e768a761a39908d0ff0679a5ab96c72e356b113 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c65c49b7aaa564e4a51034774e2eb70178676d3b5558c59b212ef691325334d +size 89828 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abb403a75623b46b939b9b82ccee2671a40dc5cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:489e4029792069942bc8a58c592500f05b09548d9168a25a934a95084fa6bfa1 +size 1004242 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21ad755f5a356fc85cc48f3b5562e00723c9a559 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8370d8bd14a5582a68646dd21ad922800d497905e750c2dcc63c04d797d36354 +size 132969 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5dfc2abad59cba9ff1978e8dd94b48b2c1a4de3e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c90b46b2880ec64ed756cd2c067e83a07a781cde7386ed8d7a663f5e1c1928d5 +size 182961 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a77bb352b7349238c3f7351eeb8cb41e196a7c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbcc9f36025ab79c119aa9fc29d2ce0bfc6f96a529ec6d33150527dcb8da3d52 +size 27535 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..097b9d9d9ce08c4191faef91be94ff0e587a2d38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d7c6635edc3b9f53b0d72341b424f1599a3a3745be9744bd3ca5f7d54a98fe2 +size 120385 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ccc9cd1c6a61ca2ff6b98b826a03d40a3668b93 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d089f2721c38418e29d951fd35cfd4cd988948cdc1ad93e23b29c34e8ebf34ee +size 57970 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f17d50c4557b23858de8984dda2ca253424dda66 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3553a4e33549c20e45b07011b0827ae32940606b97cb2252bda419dd8053ad5a +size 25428 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21339c411cce59fe947bffce20309fb100b299c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e048a446ae7a914177ca119836a9b09664cbc953f1110a499c6c6933eb23750 +size 39460 diff --git a/eval-results/mmlu/0/ckpt_201/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_201/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82a9181ce165587b2c3ef4f8f11206905b1017e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534bfc1af46fa483eac128345582dabf0dd9967a1df38cb0b4480b4f3e91aac1 +size 32939 diff --git a/eval-results/mmlu/0/ckpt_201/results.json.tar.gz b/eval-results/mmlu/0/ckpt_201/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a22680b1070f0c8e1d147c98d879646696cd7db6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_201/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d64183e67a5e8653e71ae7401a805f85446205139229b5b2491f23dbdf6ed4f3 +size 7603 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85cb139974d2be5e40f477937b381c2bbc5077ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9dd430f6861eaf0d1818155f7eb3d2d36cb65201e34277ebd0d7e82718ef8e7 +size 17007 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c64b67f57905c8f529836f4dd38b4ce1172c3ecd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acb1b98f211be95568ba0591d2c0f6d42774e69a01dd444be9708142657c59d5 +size 29770 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c0387b5c43a04a6ed7ab02e423db3c2111cbf03 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73acb9de01451ef6f5f5f812b645dea7280ac30019d359e0e5453e538998c917 +size 39809 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60d6a724b689d6bdee80af8a9e9cd6558b990fba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1dbb1d9509493d9bdf69bd119da7e9c70eeb82703f58b6f72eb29e00c0e3180 +size 26751 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78c556bc63fab5839eb548bcc063a6f1bc6b43db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2400ea48ddfa44bc008848886f0e346f52996060816812f5993debf879b19adf +size 61224 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10da15f10e046f67fa507bbc05d33bf9fc19c817 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b19a4a8528c55b6fc667ebfd66a8fba54b17107678bb74ad7490bdad25e1559 +size 40358 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..904d2825a2a0871c8f7d312f52d9f1419c63bd0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2516ca877e6bacb058e72262decd97658e9faf533e8fea35aed189ac6d496840 +size 23745 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e25395019710718a41ca372c790f60f8f6f66296 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9ba902cc97610f6c3ea93e006f513facc8672b8f98d8f1fd2d04ac2e4523cc +size 31087 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6ce27df891d7414d93cd02de3e0e4977e47ec6f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f2d1f483e7e2d93b817f966ea526db0bc6ea01f0d57225c8006fe96e8bb76f +size 22915 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a85e02c5500f4c3382bbe37b6060faeb9728bb35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:993b93dee457672e13e57b9d3d117a32720e3a3b56c9a804bc0585bbc9f5216f +size 60897 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95cf763ac137318736fd4f092a8980646bd11c3a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16b5ec9ee16511b7faa9f0364da9c80412131a1dfc6250873630533f4ed13a7a +size 25692 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffb2c864612e06e8c427e1bcb1149b2fc00e2998 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f5755f9084258ae31c6aad76defdcf3faf3762b32341ceed1c412c90927de0d +size 25741 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5512240824d50c0ef18cd720252561ea1372c5f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1567428bd51013a93f92462a564c57d3750be165a775366bbecf4c954c110e46 +size 46476 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6efef189bb80e7fb658f90b29ebaf48bf03bd9f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df01d6bc48af9659de62509734715e50bc15f9777738fd3b70bd6a4ac43ebb42 +size 31468 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f207940c8fe431599e35781a66ca9f5a889e7497 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e23c29617328623dd041a3576cee79ad0136f5c56ccf4fec6c9f4abc6e63f57 +size 28755 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..064492be03f958a51af8b3af9b790d9aaf8e7d2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eabca4f98a67a8b7a2dbe9ab733fc4b07f3cf003c8a8eed00aab141b493c5b9f +size 74491 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79ff6d0167868bafdd031e7a7e5edb8b6ef826ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efde7ae7e7f17dd1dbed9a5fa44e44ff5ae2dc6d57b7b095487651a35fc84973 +size 30079 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8db89ce04af0561381acc128b676cc4fdd4fe2e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b75ac0c000d34f1b653b43eee2c4994ef15fa57da431aa076a67ec0111d19383 +size 19094 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47f4ea2839886676eed17b2fec28f8404fa98aa6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2138e2fc0ec6a50e820c21b9cccbcd20ae43ac742e8150fec1864e91f81e2a8f +size 87937 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ecbb32d9053628526b9d0a13952745ea69fc176d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d3aea33ed8a4e541893c2bc0c37fd4d393ac285347c56b2d20d47d6096362ee +size 49909 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64ab06c96414258a21476a0118f73ba5634dce86 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9947afe101b511f1871e4ca93ca3815df3722e0e9bda5aa0dbbf74d0c721c737 +size 31572 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..443b59a05eb0ac84f4b63be164aaeb44b45a101b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:736702fde499eede939304d9ad49f9e077521b47e1e109296f4877d56fd27ada +size 145375 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a10b7870723952f87843823aa41b8f2895f0eac6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a331814605e958a2d6a5041a9f8e8bf7d1cd020d6f13ee7a65f0a42e1796106 +size 44535 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de77347781769fd4578f83c4b033c05ea9ec5309 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d35fb5780d59604ee08d43517b09d4acc22f74288dc54e87cb496988f683a021 +size 54353 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbb7a4f38e79d6a1650ba46f49d925b3d1ff79b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055b5639e64dfeb2aa6cfa82f6fd9833413dc8b8f034f49c863dbfc4d4c7bf85 +size 92673 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5435396bce6891f50d3b34f8ce5c9b9ab106a22c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e78fb18a5f7846c6767dbbea6e30133272c237924afbe1c4e6d8b946caa99f74 +size 57053 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aafe7200977d7e5a145599fbd4efc4e820ac0a24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a914dec7330a623302d6d826203d6385121aa490a3edc0e615996ee4071f5fd3 +size 58435 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..697d3cca0c8ca354e72f703cc455fdcb00ef8e07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f1cc962aa87b1601fe3be49ce6595a6258f86a193594eace76bc3dbc60f891a +size 42844 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f70c47ba4c80abf162640413a6a6e75d4d794133 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ad0ff38e5a62b1d35462c1ffbf1ad3405dad8372606899349c9fa8217128024 +size 143834 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c959431061a0ce0cbb144d6afbd02ce2b9ae75b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e46c3b1ffcdcccb26f1958599dbb84d77d2a1a518cd831a6966a7d517250131 +size 71133 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e1f703c494fb3c1268ba0436c1ff3f017e6de94 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e4d45c3d2e2e195849fdaadb31276cb511869b9540c6fb42357945cff6040d +size 161891 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9549a4178e3b01a1c25bc1a9bac32fe4b27e5f5c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd71bbae5366931169caa2848a97409c6ad3ac18a93dada0321a017c228b9a6 +size 209298 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f30aad1c0cd2c14c0b82ac50bccccfc9150e4e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b89b0ec6ac9b9b329fc250c27ce2d4b1063fd3aa18187a2915017aeb215de28 +size 49892 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5ce48502ff4539e8a81c36bfb231c30cdeb709f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c64da1fff34cac07a28bf1e653959dbead424c2dda9fd10776a582517d643c96 +size 31511 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..588e3f5d8c77c5a72378b4c1b2f9f932ea5eef23 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e401d906469725d6277adff38ed2ef223a0d2ea56b3f86855b26dbe27bdba036 +size 36029 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce9e6209dc667c07f448ba73edb54f7a09cc509c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3d7d84406dafd36c615aa28a9f097aa22592c06d964a7d9a4d535ad342aafd8 +size 29462 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdcda9a2ee43b6a7c4294133fc57849a39e724a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b0ba9a294036a055db2f456b3dce1e930b385bdffff7b8cf4903df76f2ec1a8 +size 40287 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..149c4ee1e6e3b65df58ed6635f1cfd90720edc1e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba5399ee67696182a3824509576ef6a23f16acfb9b0542f9d36856e5c1c69f42 +size 26969 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c122ba0b87ceb4fb17a1842d4f28f835ddcb4f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b195c42966e092207b3a09171444ff33ce3aebdc1d22bade55d4251a6d45b6 +size 21670 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90f8a929f50fc8ff6d4538d708dcc31ebfa17fe0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c9c7f287076fcbd5b2d0262c006eaafe827d2184476e0478a66c66607a14720 +size 57367 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56fcc96c3485fe5635cf1ff0470d6f9bb2fe6066 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff57a12b5a946498ae7b576f90cb03dff0e26055a8aae9927ec4b021ac7ed3b +size 22240 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c6e9ff8ce787d74602fa3691223a3863fb40042 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4362eec3272762f64f1e7a1e1c9a279f46934cc0bca0fb83c349ab3480d4856 +size 169217 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1f6acb1178f0ce4ecae1c110550d693dfc59876 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc63e5ff11e48a213a6adbcf2d5c647250c658e2d39f179d60bc1f66201c4e29 +size 90147 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c9b2165fd5c737bf977914fade3bfe31e6384e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b878f63f83640df7915bef69a0392486989d65e50e4eeae7942b38f1485bcf07 +size 153606 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e68c72445a2893c50b529ecb6898e5710caeaa1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea893b9302e5d2162de11123fefe9232e90fc56f1657d19c7f22583371e28b5 +size 79414 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a232c74a356e35c7e7bfa2c90516c9dcc0cedb0a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa836e78fe548cbe7e2db31b488c1ce275b77637cd1d141da15010129e183e3 +size 74238 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..306dda68d4e2bd9b06a88b34010e97fd7d3f3ae7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe018be5e5e2f1d759011f8c1c3220f5619ba2932f8dc0d030e7196fda4cd688 +size 82529 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5d49c926d4ee20f2b5ff31ddea238923ee49801 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ddafc9e0f37c3e3de1d0cce6fa15b077aeee8bd9542016aa9df4d9f351868ab +size 89794 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2debcd2cbed261a0653cd2125633c4bc10e8f835 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:528a173d48604c1e6ab2e214937f86ea84699056f23bb3e6bc3586fcdc30eb87 +size 1004122 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50334196ad8b19652d03c72149af1abd3f8f862e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6fbff923721cc6dca4750d482f8c3d0a05fdfcdfe3c3913611ea9f165f18830 +size 132945 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40e3d9ab550755a2fc05f05b72c268e7aa638895 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d04d1e3d89b0e016aa75fc8287f41163e02db1b9e167e27a554b7b3a463402a5 +size 182949 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc06e16926a74c9bfe981ea6984b9ecdb346674e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ed6d5f16da2ab5474bad619de7ea74209932edfa38f7d51a982a7491d6c75d +size 27510 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51c880d49327fc957c9160e5de626d81b82d187c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a6553fef52cfa1af870151ee843a0d4a2f2aedae8b07b6d13aff1807ed7de98 +size 120267 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8136015001aa2a53f8bdfe42527d77e598b6af0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc5b6512ddf422904d2cfb3b407b657d3a4ad9adeac4289c949a32c17ffdc418 +size 57978 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cbb5a9c17d82e27410b10c2ae3f463b7853e4ec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ece3d814c554c79cda58baf9a2856a27066a52ddb5a4f0bfb1df6df720a8e04 +size 25404 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab685fd4288ac9f12376fce4ab338c223a276478 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:198a9588a05e0b2d01a12cfba341c7d4e878136ec8ad4f7d948cad05813e88f0 +size 39442 diff --git a/eval-results/mmlu/0/ckpt_204/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_204/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bae5d69e1bf3ff298010a85343e12d4d9542a3dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f2e426bf6fa9d07033edefcfcd18868587e4669bdf453f50ede84de873ed544 +size 32932 diff --git a/eval-results/mmlu/0/ckpt_204/results.json.tar.gz b/eval-results/mmlu/0/ckpt_204/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37fd79fff3d94ec8612b72401ff26697f6669f71 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_204/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f314aebd389d6d4bd1c0cc6aa7cbeb6f503683d424ac7e7b33908d8d6ac6d2ee +size 7635 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a11ca33f2e3784df29a44fa0e2ab8efeaa1740e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a290987eb0866d89142051ae23e664debe1a851fb99afd1842830f11666cd295 +size 17037 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bdf0b9984241d47c41b85e6506bd6227a7aa403 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fffaac74ea1f3a98eb1e678dcc8cb7295c5f14ddf845aca82570102fa04c69c +size 29790 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3ea4ef8b93f7b76d94a0d3dd0f8e7479cdb2cd3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b377a08b8b4a8729380ebc33a59ab08099e9fe6879153c8e602b55d2e429810 +size 39814 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..290fb1e6d8d34cf94600c13449934e49a3f16d2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da629847e8ed2eb524a05ab702099a77720c1baaca6bceacf30531cd980e46aa +size 26797 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40eed555002cf471ebe4868cdfa7439638106c98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3b927ff17a029ea8f39214e7dc109636e482102b38a5ed8e7e0be124ded2dbb +size 61186 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a308ace254c67933044b0f59438a28c2dfabcb7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c2c89a2024343f6ad9b87851e0ef6bf0ea0e397a713a47a621e21e070e98972 +size 40401 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80e97253272848ba63289e1bc143dfedea3f430f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2d7d9e7ebf8189c24754646152fadafeb67f0a1fdf26ba025833f997be5a4b6 +size 23778 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39a5fe61c55f8299c69e7df0180b0716ba8fb4ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:229b2c9976ed708cf7b7ddc2ac93cd1c5e74d7aadff302c58b9cd511e9fc8f5c +size 31095 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4036f9021bbf4e1d5bc3f1b97a62bdc2421415ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f872d16f3400830493b3da10b1d02160335604c7a652cb502700bcf96703e8e9 +size 22909 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..841d92c02b68aa83a19593f3f440eb7952cccbc0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b93224d78338e0aba6b0dc37676ec8a4692647325b3debe971df1a5a0341c0d +size 60855 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..402350cec7804f1f64d2abc1dbd47b36602a5265 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ec097676537d66129779742be3983b9401e9d64e31e31f419e523c4eb4a28e +size 25687 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70fe61b3e8e497ed0266706b694aa1502757e71d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:207958ddb7befd83e920cd987f703fc902844d746401111a26d3d4e46915dece +size 25759 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbbe57265f38f7ba4fcf11f3a700cbe9bac6b9e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f949471eb4a4178499443b62728caafa8479fab776444809999918e5a98a5e68 +size 46473 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce05c3e36ec20b89f8bf090d3b17ce0396ca2c7a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d7f08eb37792b260d89e8145094e961d98670a2e1fa147640509dc507b113c +size 31420 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76f3971b78da15d520e9eb7c870edd76a9c7ed2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb09ebbaa1e17bde855fc31c537510849a25f097be29241db1b5556136a54c4a +size 28774 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9797f0179fe0cdbc94f00fffb574453d94eb65a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdf4cc6e2667b1f3dc9480b437464eb0cdff35c899a85df9870392f5ff9d3d1b +size 74513 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4ea92ecceb99f3d96029f1ea31048dbc96e01b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c697df10e1ef918e867a3a8133b82e2ab6d876c4c91ad40c8546fa06bcd93d0 +size 30046 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0c9345fb382a5c5863fb3f59bf94e259f050611 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15ffa7b3e0431d82fe27ca7a5e1cff522b14ebefbec459aaa731defd6cd113c0 +size 19088 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0e9004d5fedf240bc8b740525d107830eafa938 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c8fd39f606ba4e2e3aff7985c83a50b8106f50083f4f926d464995d4e50084d +size 87908 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05db7d6fc0282f6105c92d31e899bfa2a9398d1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:267b76e205902e6e3111f58ec300792b8985aaba211271e43169ce1393be066a +size 49957 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c00dd784121bb366cf66628c34433d3589bac1e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f44a5e9a9c505a2fe9596d6503170e715e5024d87931c9d86ca91c3425056caf +size 31581 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b555ad4c2509d351dbb8274645949ea8922f5481 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f530c3219a97dd5ec007b463830b888e7e78e5573066a5d8bb45d1076660288 +size 145348 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..565b7778f0032a30a14bc703d43a9649fd5830a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e65a7a1eec8628051e63d925659ef8238aac004380d5a70d66663b20c8b3dff8 +size 44563 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75b3473e663afcda8cd4d79497d091dc0777734f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92fb1cbd8a483c71ced00ee7b15000b0aad94938b94ce10651cffd53cd84e141 +size 54404 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06cdb60d38ceda00dffa8d2a4b694e75bde12610 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17a5df381edf144aa88b1959a22b82e3391284fce39e9ee870e1ad7f28990143 +size 92651 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87aa5257d8dea9385f88723f7faac333f14ed5f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1771a1edac9b4a0627f596bb186e1042b45594786de1aca8eb4642996136c527 +size 57013 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3a2293342394befb52ca3e415f5e8ea226d9e11 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:670abc7b83574efa4c8579a7b16b22866366ab85cd61440abf54548b7b271fea +size 58423 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e18dfec0c06dac363680a88bcb075f7adc1dc115 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a017e521d277f85f58a883416de3f7f02b6a87ce36c6564670637b042b9c02f2 +size 42799 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3736ab7ebb2c24d41f03c9c8f1fb1ad12003c0d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8088bf36ce68695924719e56075116f6c799f1e097148557f96cf0554e38f7c +size 143711 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea3155234a41862eb86792a3b770eb7d8e803b64 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0217511653eed8e9e3f4586cf655677f22779f37859c0357a9d3dfc7ec3c354 +size 71125 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..836baf3020d46ffcda537ca096cbad0a07c29894 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c788bb4c38d7cb42a6f2e0894cf633f3f93908fec22f5d08afe0f612551655 +size 161897 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7919f6b409bf42c706c4f8f810868adcacf4abe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6d69c515bf6fa6479c2f2a4f37651727155af7a1a5572c6b78b06b0ff5c4f83 +size 209379 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c58a9d86f597ceb2187bc0c742a11133d6b9b125 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d393262602b31b0f1933ea294de04186ed7cc826402c9e15647031e46e80ec +size 49913 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..959f41a7e799cfacbcd97f5bfa5cd409dd051fe5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56e1c2aeae7909f9f8b95cac1fcd3e6ff12d4ae98e4fce2fabd332d8b6469741 +size 31493 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bed946b7b2fd3694214c509ae3b5f0e0976d1bd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:920e50e3a0cf36e305e0589ca9771c9fe04d1f6135348533cb86263e9e7d8967 +size 36055 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afcdc27e5aff3f7c5a43d78a79813b74dfe5851b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a2f3f228aa410c3f7fe2fd34b56dfed5db6263cd5c98fbab8f568edb868d2a +size 29494 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93f2a2165d596f5176f6ae07eba9cb82aaae679c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:625bd0ea8889e69a4a09b5c963e9a90816628d4e855ad6e2886ed897babcfbc6 +size 40340 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db0f585a8626f9a7fe997a9384215f0c2037692a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:730bdefd070dfc847ac102ca9c9cc7ea72be47bdeb2f16f59a1c15f3fa627833 +size 27001 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd29b7776e9e6e60e118e89cef82ec9df5450381 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3bb116666bf836bda96af93eb2073de2068cf2f04e6f50d7f8b02f697ef1c9a +size 21674 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..307ce230037fcbda7aa6c89034bbd18e8dbbf664 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee86af54407785463f0d8eb7a8f250bdd243351e5efff40cd4a55ed3b14247fc +size 57412 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e285dd41b5d4b56ed4dde6930bd0dbd172e9413 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b4686081ab3bdbca03edda71d76bfc5e77aea8ec975e31cd5493ad602371aae +size 22237 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..485adfbfcc96b8c41157ea13af78e015e18c70c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a5751ed56f38c67a0f18cfa6466f928bcdfe2ef16e7f690629870f305e5ec04 +size 169036 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f62da4880da9a076c98bac614fec88b479cbe6f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5ca0d8eb433b56e217ab4733960a68cd67a85f45cb74b6bb74ba849b788c9f1 +size 90106 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80b80facc96bfee11b4193d450aaa5be5645ddaa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f455840d6f191262d01c104aa20eea112aa732a380325ccfbf43de4aa98859 +size 153846 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..184aef1aee37c97d9cb2f439b0bfd03ac5291ce1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:624bd28110dd4366c52bf4151c277e7afac14778d877f35f801a8fb51bf3efa5 +size 79388 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..333a77ada1dbcf0df71521e68580e36565cb73e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:184a55ea69016d22a0ac38794001ad88e0989167645a08dcf01d408a44b22863 +size 74328 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ec421aa649a8ffc91965b79cfff648cbd55ebfa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e06b14431d66b54f6e3515ffa45369f5760355e3f03aaa25c2f2aa80da4f94 +size 82482 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e176118a5a3958b84df40af671b92fb8bf1e3d95 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85def32b68367a2b54aa4163e8996f866c13a76f77a6c7d9c8f68d282a992cfd +size 89814 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55b38fa5f866cfcf33538a004a90b3d1fed87b3b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:880086789a96a8de378a6da5267a1c884c91e7ba158d575d6ef4f0462bb498be +size 1003937 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7beb3ab1f83f38d2b4b2fc1cea19c15b9684a2c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbd8154eddebb15d134ee71fc034774732a1bf01e9bf3bd85702e92c8e8ab5ab +size 132887 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f7f62eeb803267ac5ec796e72a8ffca6f256979 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4167e4a544171e802844949fde4429fedcc95480db28c49b246901dad6f5bdc5 +size 182943 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f8b8fa0d2adc48598697b130060923c5608a3a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cef335793f72d85fbbc252bf517d8836136d8012eadbfa3ccc193ccf6d92cd +size 27538 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89d2584da22574bfccc06516f5224cb92cd70e9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf49f4d05c8be9665e9f64df9e0306543c325b66134d6e0145b1dbddef4d2af9 +size 120391 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0b6f208deb4dcf21dcfcd128ea564b865f96e95 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac5118288e6130bc3066149a85eff942d7581cc203bc547a70817d84753878c +size 57985 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b8d42b39a7e6bcbd6cfff02c599adf49ed497ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f503eea43328ee63d34848a2d77e736eb3e6c033eaf0b621c5315b8fbc07a3b +size 25456 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44d7fbac3f72b115d1e41c5eae9ce27d5deb8fae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:129977e5441e6fbd9fca40cbd0a42ce9a51399bd52a9dbd65a0e74deb4984564 +size 39443 diff --git a/eval-results/mmlu/0/ckpt_207/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_207/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0313e180eb01aad71da828335538433a378b696c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdbcef35c39b5ae012d49a58c7fee069eada6adc12dd0c8cd1be9d525f9ccb10 +size 32911 diff --git a/eval-results/mmlu/0/ckpt_207/results.json.tar.gz b/eval-results/mmlu/0/ckpt_207/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8df8c8f8f86f2a2239546a2253169c650c4e497 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_207/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2391e15679f655c2368eacbf1b13477a1c16361d79bc5732e6d40d54d69aef5e +size 7625 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98ae225b21e2fa86f1a1deacacc9280b5939758f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35cc2d243324e29e9a1165f998e7922967322b2e50329e5bac2ae700b34069e8 +size 16999 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88cb107525cbecab928bd4c7ec7efe84210a4cba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:513ae41e01af6096f7964487535ab74471e27771bc7c57b1c08dabedcd04447b +size 29815 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62f545017da6319400d77199618dfa6f0709bc89 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62eece02065ac270555bd57361f117da3c8e19d515a4847f8a191f4623d28d24 +size 39823 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e38a490ccfc64d9b3cc7172cef8f0440ba63f809 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b7131c7cb42503cef466e65484d0d3d19ed266527a205f74044ebcd617c54c5 +size 26741 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fba1bfacff9b1c17aca583aa1207ed7943eb955d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e47739372d756a2674b76aca17aeba2eb834ef6ae795adf59d28dbe3194e5d +size 61196 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc1f9b63f4b10a6afccbba99f46fb8a283cfa968 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f965d426339b9b6654930a891bc8afb9b06af306b4c64d3cee286d4c8782a984 +size 40392 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d28443fbc44591043b4967d1b5f80ce02af9b172 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:487c34ffffc1c9587f25516a30a324445cd75ae950a06d33946556fc5de5c61b +size 23755 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99c931bda1b08d8baaefad81b49879c489e1e75e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7fbc8c55c69472e1dfc3a7e14bc8496858a725121f7e51660487eacba7217e4 +size 31099 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..514ea38673c0ee0ed97feebbe915be8b58789a09 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:765fa1ae614051c5fe0be10e15dca6fc4082c499a9b8c1fd86634db4822f7f36 +size 22913 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e262f35b97a29002a4b69672813fec07db05971 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7313e1eceb20b2093a6e2da9e2b5c51fa799787eeb36af6bf07e7d7c86c08234 +size 60864 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e822c3dde6ba66fa03613c7f2e460a9e93828fd1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42cc535a5f38c69f3b4f8c4e6bb596a6395de3d9fbda995d4d0a31d3dd2c0868 +size 25723 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81c352cc62565f57c697b2af573f76aefdf68bb5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5deff22fb594c55b521bd994f312c6a4bc7e756ee4f2dcaaaefb36e531c984ce +size 25762 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a24a61c72ab6f19366e78c4ebf18d17907bf1ece --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f601e999f9678f4fcaf909cb8d5e5715c53b00e1b485c276f0a45de82bd1a5a6 +size 46472 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fec75912d25b669c04594a775769bf95f136da2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b2f5814f828e9488b2ad0c271be8dff4d78459e39079a0620a7042df32927d8 +size 31480 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec1757c0e4a81a76d6c229c2e268ec12a44c0c00 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:004e982fff089c46c90474d5678f2013dfd027e9a72f93ea3d222ece19bac830 +size 28725 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ba91726a98c3ec31c4acf43cd44780453538e1b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:265f56b7cc3cb72488750771bd54d761c6d14581ab25ff119e1d7c5cc8b5858a +size 74581 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a86ca14c6070ebff8c10f5e504ffd52bfe438f3c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323d067519fc3f8467a1df61692d352d363550f0c6a33a65fb188541d878518e +size 30077 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f92b0684088a1a5e81f2b3cdf5358feb108489c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4509b460130e5a39c9eb0248da3ee416f975042ebb9926d0b003154ab80c9ab +size 19096 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2b2cdbbce58a8358d70d84cbfde4af552696c6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01edbdb9935088cf587892a0ccd2aa5dba9f6b31475f55e65c391a4c8b1efa8e +size 87904 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9faa31e6696d7ad1c1da6842deb65194e6ec0dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a0cf63a999eb27268f333836f1b84b6eb4fb188b7b148b9da875989846365a +size 49960 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40e8e0fd3e8e9c785163bef08e91a9c8316d741f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b994e914caa7cf22ea4ac93fe994e26c1c7660771d8ff7328167dff08fa316b +size 31569 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edafddefcdb396730206d5076212598f33d8394b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2996d4e767384ae4deb2cc1d371fd981d388032f9fbb94e4fe2216d5cc0ee11a +size 145408 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..820a4e3cf293fe748d6c6063c1189b75a330dfab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fde6e0c1efab8c64c3cba53d14c9d71d86f3ead80bf8222f8df7df158c242bc +size 44569 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1628b62cdb7ba1132c57310b58f89a2147d8d7d5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30ddef5346495b51c5087af0abb360933cb49096a9d37d04258e11c74b0cf0a1 +size 54373 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff4b6dad8395e83462c920b37fcec8045e05fa87 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1951e5060a99d828124e04b22a51af86e97d6baf3a5430491bd97f5e894cf73 +size 92661 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5efbb8832f0a9cb5d5e5cc5e02ea690be52bee8e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ed1fb5538a816c6392fcc3f07e7091708f9de3356399de5c4812846e482da0 +size 57023 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..226fb55bb899712cbf0ae7800f5ef0d09a070212 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be72a95b6d15598e86e0e9e7df0513c39ae803ff16dcc8b9fbb4ce16e1885262 +size 58429 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5cfdc43a806933119b9905c6a9c7341abab99ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d20f5c0587203902e6c2efd7b1828e879595071689930fdebde9cfbbc1be0d61 +size 42842 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22143ed9d66e74bc695b50a403b013de2567d7f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8af5216243d93309e9dc52a03dbc4402631de34818634d16fdf0cc8bf3ead70 +size 143831 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72b8c149f6d7d985808031821ef00b3fd0c1d604 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dee9a63fa75d6a71472661eaf984be4ef74a8bf269f80e81dc1470fef9f90af9 +size 71130 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..920501b6c1875c22dcc23ccfd32ea72ae1534eff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4a4d42354f45e1d8e917da876843d4d964088100931bfb84e849e4166335fc +size 161960 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce47dc312d7cba8e8ba41214a17973217bed351d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:887003a1ef1d6c86ab7f202c18a5c4693d49bd75229b2c94b3451dde01c5d0c4 +size 209412 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6eee60b48b2718235a60b58becdf5421ed389332 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:714032b4238c89c12fe19d9e67057e6eb32f2a2e803e17333e134c6a64340863 +size 49881 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..674e6cca1531ca136c4053c6ec45b1bbec443a7b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb72fa37fa66f44a87bb849a0d4675fe054b71a086c5f469cc9b4be20fba410 +size 31510 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ac94ff1b294fc52f45662989f77b345c138755a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c850643eb24b676383e7889e6f68c174b8f773055f7fb801fcecbc07a0a8a6 +size 36074 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..799ab533bf44687c5b4073322d8aef5e3aab34c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84fbdaa79272b6d35230b045b81a44b4a3e785afd87996496cd24d156dd6019f +size 29450 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3941ae148176f97408673f7c6aea2bb83b520236 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd7324675d2c0cdbcba00f3adbf2954343c20116fff6aec326f48080c8e4a3a +size 40289 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb1c24a8b5ebdd5857c4c678d9e3c9abfb068179 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6045b5c923f7a23998e363b291ad20daa9d850121bb57f784be51fcaecc13de +size 27002 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c488aad2c60d0db35967b1bacd82d6882deabf5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fce551370226b91e36e183f042eefa197e6a57821773c7ae682ba78c95e83bb +size 21700 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65d6e00428ab1230cd42c4876027a09e05c76905 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a60bbc5a7f4c81cca82f1703128c7ffd5532b8ca397a1663ebe07f353bb06e +size 57385 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7efad6d3e5668cb90f763d811b8433ae78b0fd0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7c1a382f2db3886cde48baff0dd433b88dfa3150caf23354259aba9a06b3fe +size 22246 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55bca0f8285f7784a6366a1443ddcdf7095c70fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7860100e271168d7765200b8390e2f08207452114ec7129252eaa9be0f4ed6ce +size 169110 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24f330182fc42d3a3d9efd6dde21163c95b4f067 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d7614d3dd8d13f8d73761bcbff8b57f8521ddfd60e200e521b9e333a4637a76 +size 90144 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0111b512b7fb2a138793eff399a34c844541e60c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:778fb6926dc3e9f557c7a924d9ef6ea4e785e6cb0555d01ae8f7aeb19597ac6b +size 153682 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05d0829451bb1b85ae1525f8662f220771659af6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cef8ad256a88a3b5ab46743ededee3452e89b78b86d14fde2e92586d25e25063 +size 79367 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d82eb7d3926a2b2298567c89b72e8f99a15a13f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85dc4e4c605281dfb421fb4ad4450d9e85467c2b4986500a6c38fd701c1a3b46 +size 74262 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef0ab6303603a388bd40767f0c933c3e0f35639d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5672f1ad874bf5afca2adc19d4821070b6477463e5308864e9fc58a85dda9a65 +size 82533 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a51687f5c130648b1601a5ca46121370864ac31 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fba5ab31fe0f331b23c7102204b74556b3a60b427613d0230416ea18748c4fd +size 89892 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3da5429a8bcb88c0723ba987fb96483f154c6040 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74593ce1eefc1b238b3d2542b7d2b81aac14f13fcad80c3de601e96e1ec8f072 +size 1004516 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fba16013432afb49993fb68a8ffd636f14893a12 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9fc3cd92b27ce1bb9ed35508422005d66192a07a7cc97946798b568aae6093f +size 132947 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..873f65b688dd331c152bdc9badd2d751132aeea5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5fb79dec2451dbfcb81c40d8c29181543c2f9896d23ad111ede8e7c216a1fa +size 183022 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9895599a0d72f8c97b8db98db5b0665142b651f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09cbd82e77de95c6055acd9b878c44b0f86d925bb8032572aa2f5ca565911f0 +size 27500 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b37f99a85cda41cb4ba08c9d3672253b1c2f3ba7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad67f704b0da8c52c21798117196cb02881cb25f9434969fa2ba9cfc704193c +size 120330 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fd1dda7d821a2c1c1870275be3f12d7e74205a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bdaa0e9419fb1eb449d1090d2b4c7d6ea1bdb0bfdeb0a397ab2a1e83e62706c +size 57967 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28166df3835e09f77aa1e2971e17b80ae8372bbb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7350b0d23a5108cf961960a91c279620ba2c0263f9ccb1ba6b1713f2a1b0f33d +size 25421 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..790e8b7293cd335a72892899fd7e07dc84ed0adf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c0899a38d0ea8f4068fc32629fcf32652776507693a7736953f86875fe55126 +size 39417 diff --git a/eval-results/mmlu/0/ckpt_210/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_210/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3801c976023bd57dc423a76d5475d7feafaec2aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e91e3d513834d7e0f37202be82cb2dc204a08dca640f246040a5a0761e157fb7 +size 32909 diff --git a/eval-results/mmlu/0/ckpt_210/results.json.tar.gz b/eval-results/mmlu/0/ckpt_210/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3512418b152fdb100d9c961a2f0180a66bca86e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_210/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:383a040ef994ca0f671b5b38c9c6dedfd51b67ac7a9c2de181f982b62b9fb712 +size 7610 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..267151440d39df67e246cb0a771be906f0498349 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f8994eabc52d095282dbdd15d0043bb1c2e5236d28bd3dad01549b8ee52733f +size 17010 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ef774c222c5b8aae9167efe7ad9864006171a46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d473382ef871bb6574f47056f3050c828b374bf8349059a06d752c9bf5b77db +size 29811 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70ce9fbb7a7e897629f51f400c48e14953483bd8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b335a2c6d72f799ed3af9e7a8caf57411df88ade1f323fd7dde0c27574f3aa +size 39791 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b8fb37c3a25fc9afa906f2b0dfad37aa3b6ac0e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80e936334ff84ff68419146208cc6cbcce841efac080ae31a987b11089aee2f +size 26751 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e20275d748478c609330603ed247e013fb786d07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8a143e348289cf91e30de92552888cc44693e89af9e97a8271472a88eaf1b83 +size 61202 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05398e611d22db2df20d336c7d48996767367266 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38fec06ab0148cb5cfab134c1f9121a408e860929050435fe1b5ace8de73e66b +size 40373 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63c89bfde418350beebcb609db3ef64cc0c7c5ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7794b2640511d8e2f4d078e36b2a5a4e5bc22a8293204030550ed1ad7f1c97cc +size 23770 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1a9476373f389c174278ddb9e4507a2f77bf69a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a409cbc97073d6419f96ae4eb2570ccd5ec112b71d910453b3fd788091bf29f +size 31096 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0586771d1551955d3d4557dcf8243f75d6d6cd4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cff7478ca80812f0569da813ede07d6c242184571861d29bebf2afc31c27a293 +size 22911 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d09c6e8f01b47f6ecd46f0f739c5ffec2bb7d3c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:343291290cbd2d3a0b70dcc769035d0fdd52514fc0a7cfef9fa0f0afcfb2b104 +size 60858 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8be0b24784f7a3b2a1c94875e80092a90f40af2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2556f4e72adcfad684ded0fd2512962937bfc12c9d7dc815c0b04307b6785248 +size 25682 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c1377296aaa7c7b2cba82b5ac390efeb10e1f94 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:406dd95f471473ef0131966c167fd5d95727df1c94f0936f80cb8f848c71f7f6 +size 25769 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7a182d728bb626ce99b7b948aec30aed5b6f4d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cd2b65fde21a87663ef23c967eb258c30c73fb5dabe4643c25cb0e34421f59f +size 46470 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f68a85aab62e1c815ae785c8e66da5586e631f90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f019093a39b1787d734a0f1fed8ff52eb6018107909fb890692b0984a83db49e +size 31448 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..252b48eae1ff5bd75ca5a8847765f602b49ee1ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bd7d1c7535e6a99baa4f25bc45c06ce7c65924947a1c133b8dd7143c67474af +size 28760 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14b91d29f5d70ec25e1008732e3919f709df8ba2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4623afdb22bde4371d8fe4090c14dbe3857c50a0a10a6dd484c74abefa7e539 +size 74461 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..063a1158ebcfceb416bd50aaf31e67aeceb3304e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44121eff2703ab764d93084f3f1b0f08bc17d02167ede759d1f687cf97a1094b +size 30080 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..597945435268f2e7a9f65f53dd1a743539e72225 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fbbacfb4606b82ec251d070829b2bef239258f46c1d43d472a1e164d190e7c5 +size 19040 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89beef0b899a83ec1dfe3a87b9b70cc8097bb0af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f358e0c6b67a10768ef994d6fc87936c605eaf021d6f6982795f43f87d65777d +size 87956 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d93d398fa67d3952ca6a4b3fcfc3b1a4714a166 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a476fb17ec452a116682f595b26c66f8de35001ff6b817e76848595a9d121dd0 +size 49938 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c57915e59464c9eb0ae8c15b2981c04e84bb5131 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b469d96a9f7c87141a4ddbe93d975552ef884ca3cfd927f7f65979bfbc9084 +size 31564 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..124cb86087b757a976fb543fbb0bc1d4fe8bed67 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e692b495338b0af2d1cec87f03c4a3fe9a994fe155e4ed150f0b97562c17e2e7 +size 145341 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ac1a0d1bea8979f368e3eeecc5db9763eb6838d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6093e9a7673f0d31ee21b5b1ab2d66ea4fa96cce95bcb011e320fc4347a60eca +size 44549 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..307268e0b78077e09f8a220a64df3afa9bd07feb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699e6be4deb3be06fe8034bde5feed41c871067682d91a170c55972e70b0bf7f +size 54379 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0e58d5c4564d3aac4052360e35d2e7360797570 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69a4d09276e5f62c90cce120da342bc166f1c9d48b91a21f18a9a98762ae2b9d +size 92696 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fbee57404aa81bb989929984fa85d3e741e2902 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b477818fd91e4646c4e9e24f8a12ab401dc730359c279f867c57d9d02b17f84 +size 57057 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ddcbd31a017e5181b2e7224265c518ed361da5f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afdfd93f2ff7afe42e2a1989d62d13e5cbcf3df26f4777f788e02813c51e26f5 +size 58444 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77d34face540da6c8480cf157fc0e54ff2a9f327 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7e97e8639ec8475842b1f15f014ca1f7ed8afbd65f276a9b2754c02dde74c24 +size 42763 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7577dc9a95c2006b4d56514f3857861cd9e4ac57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c49aad20d03cfe78e80938836d3754f0c78ac63ed704fb9b9a67b5e399e66d +size 143834 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e066a5ed32b4712d9ab3ce7850045da77c95a027 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ce8e799f43319e2bf06645b34c7db1f042c6f8b5e87b1f71856de0b578d4b61 +size 71089 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0649a02e55c447c3965887e6c5510d7a1c41f56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e1ab6247f834a287784a3918d0e18f81ba57ca8e7ca4ba5ac41f0652a71501 +size 161925 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c28b0f8536a5517e1224259e409ed9c74a04ae5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668a1da0ebbebd19fa2c544cfc15570580d236e1858f6071cd1841b5acda55b0 +size 209344 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cd9245cb2876e9fec65aae81ad9d89356503cea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5170f1694ca540067f5f3817ab66414c34fb4ea18ae4b071944c1667286dfb8 +size 49925 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bf35a9989cd85fa0061fea9c2624aff626c5e5f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecde6840bb84254e65b6841a05f570f57c9357d0a305e8d96765f640f8c0d677 +size 31530 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8197540d432a1e988866bbc9c6c8e4eca6a35b38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:768f2f0abd9d3a35b2377dc69385b04d9768dbb1dc8e6bbf2290b6e8ade4418b +size 36049 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e38914ca1d85ffc21f6db09aed589130b62fa4ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4dec0dfc383b650c7622fba3ec5253ce63e6cab45935b496f5f0e0dd7649117 +size 29476 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2e852ddc80ec93469308010f2862b550601d84d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1f7e1bf290c92e618257577f8977dab004c3120cbd3531079a45ec2969c205 +size 40316 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d23ae32714aa81908d2b1aa02169522085cfb220 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01ebacc05179798a90d1d6ad214577ef17d178b11d094fdef0c777b4339a1da3 +size 26998 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99c3e5d5da7b5c2b881997fe76a1359a57cac4ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea1d8bde0c23f435e2952b0d97417d6c1d59b66a87e75bffbda7e9647eb3c95 +size 21679 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c9dda7a6fb49653b9fd0be36d2ae700fcbea9c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:780d5c31ab647fc13da2a562fd0ae29f5a22ab594b5c33dbd01b3553d63c6e56 +size 57312 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c49d65dfb92b6a62374a7913f160435413ecd5b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1688ecf50e85b20d01b5541820c10003edf42b76f0de7a320802c49948737a0f +size 22248 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f3e60df725b9fae92b4dbc3eff48a186aabf2a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c174c790b696a494ab24c04db082a178fc7b66846d6fd4b9234b355d47d3540a +size 169057 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cbd144a257ce397d615f6bc51290a744a9734cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b098ff7448d973e93ccb9e3d154694fa8e107c7a977f18d6f042eee563f4a15f +size 90104 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cefd9f57aa78804287ae5bce8edff9de3eb614f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e648ff9007857b5e2133be347bcc39aed88afa3d8f6818fe61e88d271319b5 +size 153835 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..397065d9fb025c9164822c9e19baee67e22c83f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa41e2657c6ac7f473b14eef2dc5af5d1c7fd8209769e802377a7602aab2c0c8 +size 79388 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5425188b473a9495b260ba39734466362db2c5ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1daebd5e99bfc5d8a72d2c03c26792371ab8ef5cf436a987bb0ad105c5bdfd82 +size 74237 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bb017ad96df674b52a787e1a6091faa4f082dde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c2914c1bc970f6ad2385ad7ac2dd3eb8e6b356449ee10c04b501d70000e76bf +size 82505 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9dc9a618e7ca22ed83e2da878b7e9989717c758 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cf3af1fe831c98e75b47a484845d2b9b40199629f1feea8a2b7c1ce4cf1ff1a +size 89849 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..092b98e55019df75aae2500d55014dedd4d719b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c5f821fb839984de57b9d32383eb215d31b4ea4840d0ddb17484aaf7c730002 +size 1004264 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b389dc4452233e328b44e8a93458b6d6455eed8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81f580a9676fd65d81f2af6347b74b4d91d63c08ebe4861b732556519ca267f0 +size 132996 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b56203632e311d3685f5fbab06acff93214fedea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bda723f1ca5c3919ac74e2a246a84c84ab29653c4a1ad34df64d93fd29551fb +size 182945 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf88ec192203652ea0c471918577ad2219ca3ecf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91a3c4f00596065d741cf7fa697a2c7e239ce86b34fb2a670e911bd900c50eed +size 27516 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..496c12fd6fba749da888eb6f59e797e892a40a57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c99c58d8dee0eb4b0bfdb6fb110f7c3d5ce94d2890815ba7d59b59f541f55c +size 120354 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7bf9244d92738795e6eea0fdc8efaf74a916b32 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29e616c95fad25431092241b14e57292433b9629314f9b7b4ec14dfae6e8733d +size 57982 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96b629e2b20a69ace981490dd8ff83e057bd25da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea80c6a1075b22ec4c240f1263242bc84f63d1eeac52096eb3e35337d54cd0d9 +size 25417 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff84aa21d15217a4c66fc946e40247365adc80c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2a3d6ee351660f3e8840f51cd1fb29de60ae6fd73c8f2a98d6a7dfa16e882aa +size 39415 diff --git a/eval-results/mmlu/0/ckpt_213/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_213/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb30c63535a34df57f19511be83724f378b8cbd4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bbbe71fc1337b09a4b2d2eca000393d6830afdb4fcfb5fd336bdccb168d0eed +size 32969 diff --git a/eval-results/mmlu/0/ckpt_213/results.json.tar.gz b/eval-results/mmlu/0/ckpt_213/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f85eb1af2dcb358636b00de2c545ff0117645230 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_213/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f92f18d587511d5962757755c30c70874d028abb439684bfc6c2408c51f098 +size 7589 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cf883ce37f895d4dcaafa4652838e1d5c36ec34 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca99d4016c58790b4ebdbdc36e07139efdba02a0b0440a37281fdc9bec24d68a +size 17065 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f46fbd0d00db4f40323cea7f277a69d051b56f77 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe8be20b52c939731689b7690b128bdcda62b0c0515c6e3474c0efaad8fad71d +size 29800 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7cae242fa40d44630d912f164a3913deee03e15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0a9165977334ea9701b3fe84f64b9d4be8e9bd0b67f71012118516336f07145 +size 39768 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62ce938d50496f7315ffd3e2f9924c7e586b1ee5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9d9c357eb1e512f157888fe56898283ed0ad8a26e84d79a911b9ec96bcfcc7 +size 26752 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..616b95246fb8eb88c814d99310f420249392b399 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e077e44bce7e226eb65fa2819da5c02b542e598d1fadd4041d91345730be0ced +size 61187 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbfd5980d7c18ae0d3e130bcf0bc8040ba8fc7cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34fea76fafab45bf38d9595e7df7af93a59c726a010a24a0b1b1f67599f64a6e +size 40425 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0e6c24f6eb3b7dccf10e5cc927315f1d4af42de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd6dad138715c7afe4a6ff81e1164dcc9ac70d98ba49e2f9bcdd71192eed056 +size 23759 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea4ca7f09b27d10e2d00051571f5dfdbcf2c3263 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beee3b6f19aa8d77aac144d6cec808101cf8f781a9806dd9cc29dd150fdb9c2b +size 31069 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec4dd1c3883a2531359e95d97e22b705f4736994 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e08cc8ddc4f4dbf9a2467e877ea98f66350086af20a933fdfde20f22a4c59b24 +size 22896 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..614d6b952e5fd9b71228b52318123b19c929404d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f6bd1c95ad54e854bb376f11195a6f536e6ceb6db037ed46e6fa67e6a268a22 +size 60896 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e0137e9e9596eba35c759a3e990f171ecfc9ac0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385540988d7295392c2b565a3e37681c9824cad45ef03369ee64a669e8241a5e +size 25689 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c30d38c511acae2468855d40515626ff1a657da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a43c3dfc03194fc9aff0d7f59d3d16ea92fdebc44bb8f0a40e47a091dd87ced0 +size 25747 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86b3aa702882eebca2cb11bf8e9422424454ebb6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:316779277e691b5a9b22a89c502687f5bd822fdce11bc856ca50339d79fdc018 +size 46458 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdf416f8f557951705a9898c35951d5184e8ed02 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e7533dbbb5bae016325eda38f590a49c3e35e136a4d8144bb643ee50d271ab1 +size 31475 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..faf2461fc95afb9ade0b2e248428e5ba75dd7575 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f631bd8b0c82bcafa4178cfd7faed2779fc065847aef79be87c8f3082acb73bf +size 28780 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c83c35334422c69f2ec4ad9f9df1481101397a83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52714f1878d868eb1c6ad05f582fde480fd6d62d27de8e8fe9a874e148317b82 +size 74559 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3945773dc2df9c36af6193f759c0fdb90d72bafe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f137584e0bdf727c2dc96aff23f0ab32398265ce578128c0fe05915617177ee +size 30065 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d58051f02228b5e6fb122ebaa08dfcaf55837e54 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9527887094300685f167f64b36bd3ce1c1b2bf4f65a144e26bf655e242e85885 +size 19067 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..128db3bf8a806a1e5982d1c4fa02b439880332d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eea4aa03d726cfee76887c0ff1e24c8ee5ab491d2df4604443a367ee8560e73 +size 87925 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e39048cbc8fca63a847055c648a7c3c9170bbb88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f8da8d4bbb0194cf500c93488594ec6e2333beee4a95d2a955f945f56012a21 +size 49939 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08bac3f4c3726d513195488c555eafb9d26a3160 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2fe7a371871a405a81433683447013e8249d357604a3abe0f259f2394a7024 +size 31617 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..448eab7629d7dfb42ea27d08d49621e1d070749e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b1113fce8b159f5852080fac653318e11edc05164c7040ecc81d6b884999251 +size 145464 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e169fd7aae40e8ff812e81d4642a24e47012b959 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf798e106fb1b78920e9dd079d7c68fc16592fa8e913228400c16f3aa40f500b +size 44557 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9627d00686043b3e5a194ca3c2681343310b48d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28e889f44b0a9365c7d9e84a457220ead37f5b93f82c657632dc1e76e38a9250 +size 54363 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11f9aaf57377a3bb30c69d57bd625556de7b4a00 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c06a5d8d4f9b237321a92cde753933300cd25ed44d0077ebf89e4eb4b1f5ba5 +size 92685 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3867da2f0c22012200c28f916b6705069291dc7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3865d089d38cb40df3677ab92bca109936f2b7fbdc2328b1b95b04017067b8aa +size 56991 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03eda2663f014fb4d555f0c1e45c3c5734bb1480 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c84bf883d0901e3c0f01f98b4473765766a58c1a314391a3e95242cf8c61483 +size 58451 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a01ace7b87799a626708294b739acc46db4b403 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:035a8d6eabd9e7ff49d4d24c4a672edea7ec945b2d1dc927be3c63487bd799d2 +size 42810 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f02fc6cae2e795b98209516b39f0577a00a45695 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d735e7c4f76f83809146018533523035820ef19065b547a5dcb429cf778ed4c +size 143842 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46020031f677c451e0bfde90921a97566ed526f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13c270d7db38f6ca4e55944bd0f0c1bca10f36130f59cb96db4e8f6eadf52034 +size 71124 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0abe63e267617a66f6d938472366d0646ac44020 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb35209a45a0b75875ccf8109c32d2f7959a8d4adb674654ff7530082ca9727d +size 161964 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aebd57853d0f10b8ab3e6253d1fca3bb6b3f6a6f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d78cb3f5277b56f6a51bbb7aac35734126f9b0768733779b9460cc1a4c21b5 +size 209585 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c838fbf4b82d96b1728acd4c048976fb4752f082 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce51db5771c78b93be48d73128b9f324f750ac17792dbf4ec3cdedb54a21c594 +size 49919 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0268680e7eac59617eabf3701353d591566c573 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7357c338538f481540aaeee55065faffb2844e7bbcec9f0298af96f198c49e18 +size 31527 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3853da9ad7f39eafbaa7ee476ba1fbbc3074b3af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457b4104e099d8a702d6ace1c2bdd65a2e1ecdfbd2b84e2acb7f4737fadefc10 +size 36059 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2a55c66b7215db908a65f5f1a9e9872738248fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:174578fa1205975694754294396b578a7ad983da72d26a1dbab0e04a2e09de8b +size 29458 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79340d7d92f552ca91d4505e07e0ae2d6815bd57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56627bb26f1cfeaf2e51747cb8707d53bd0e827af760b4b5090d4de754977a10 +size 40319 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1748f78c8e914cd50c7f9f7e29e96abe560a8aae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17570eba7b39cdc8b2d1f3aca82751554677d049c3d095b5b341a17feeab95ae +size 27002 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08c22747ab7e5d18debf4bbf835c16a05244be09 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:566530ed88a462a5d16c53d47c1385bf917c0f139cb99bb7eabdbe345019a83b +size 21688 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61c774511a5688416b52b9c1ca91462ef82a7de4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d13e77631cf90e455f72fee2c11e4c332b96934a9acf0f0bb02c8ae9d055a235 +size 57429 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4170d56b7c87ed574320023f1c578e5ccf1e2ff1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebd8d3764eeae5bd59e36c1ca2b0839621b71e9c4aafd1132559b2859020b80c +size 22239 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..844af3a7a8be1b50d6223dd6f4fe0aa8ca0b1c98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bde731f43db50108acb03bc385a8b1c5d3a2c7b5bf529325b59157abef8c92d +size 169035 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ecc9b5b77783531e911b33fba3dbcbe28e37838 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c2fff14ee3baae1ae12b13d5f6942c23db2ec819a2e0e5b69aca26319c4ee3e +size 90124 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2fbb91660c6c7f1ed542abadde266b290a290c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49c48a1e6989f6594f13ed4bfeb81a37b87206f0356b303748590a45d69877ce +size 153694 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdf88851c47dd357fe19273e70b17323ef98ac7c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c090dbc55a95d899286147e99511c09df8fcf14b65d2a86dac745a39deec5eab +size 79356 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfc33f0a5b935537f2986cefc924381c3ab3a39f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7d688e8fbbaaac159b8b5043b27fd81f73606b816fd35ec9ec6189e163d358b +size 74298 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8017c38dba8a41a62254d9f8df479691b33b48cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6959a9f206c32214cd11b70d0e01a8d1cb435145f4cfd442cf4947cd51098290 +size 82532 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1352e24a4340b8cf0846ae0f52ac58c9754398a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d5dc51e2b91848ee3fcea9425060ef689a25689e46147fde5e59ba4fcb42b3 +size 89860 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96bbe07fb9108268619660cdd74193537d93362e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2466f67e6345323faa9f672d01a6d3ad732b4fb6be6bf384196a75f8cede87e +size 1004372 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..021856ee43e1c7d48eb339e87bf7379398525172 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7144af3e2924b4e5733861a33f67177eeeb4aa3f2ed20352821b7a1c015a231 +size 132862 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fca14f66ecf1159acfda009487b0b7186106185f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e80cafd71c36da1b257792d410551d03adcc0746dae2f914ff796a1fdf6e17f +size 182933 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e2183aaf3740194fa77c1bd67b9cebda9742fc8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceeb47efb60426c9e392fb137e5e13c0b33ded01f9dd2d639776c639095ac4be +size 27521 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3bbea6544c663fe4fbdba1ee23e256dab0563a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7624b92e087a0d1c6828ca6d5c1d355be38738f79a71590260daabb19105b51 +size 120392 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59373f21763c3a8175b50883f5c73b0bf223ff35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df1a1d308448bd09d135aa7387d454523654a8ac292ff9f50de6fa0967a086f +size 58002 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ddfd3e79aa1c5171adc42bab800d828dd58b604d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af0ad8ae580eec50ec09745ba17a0164ab813c43cecfe2b475d71c0631f765f8 +size 25445 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbdecbe016bd28d4bb9b9fff0c7095344c01af6b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d5eea53ad329e73be7a51eaa0ea83926006fb597a0148f1c77c341dd0592227 +size 39428 diff --git a/eval-results/mmlu/0/ckpt_216/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_216/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2256556d1faaf5bbfdbb4af2ff68287b39ba9b5b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7781ee32cc489bd1ee75ce0ce9c0f3175cce48b358bbe2f9f84619d34545ef32 +size 32947 diff --git a/eval-results/mmlu/0/ckpt_216/results.json.tar.gz b/eval-results/mmlu/0/ckpt_216/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98f23e225a3fdd366166e1791427b3b8dfc09e78 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_216/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b62e653f4b3de1a626e142dd2a60daf035f6a3fab90a1de06a4a2951d8af73 +size 7635 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94b7e72ff3ac7c8206c40d0aeb002f147cb0d448 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1735ccb1e99aec42536be944778d93635a234049f8baa49b5aea76654673ddb +size 17019 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76b020018cd1fd6da2c818fb9d822ca1a9bb79d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c83aef7baeae0b12cf2c347eca19a8adedeef5497048f7006b2a8989c44a8f8f +size 29821 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccf065b485b7ea6be5bbbf909a83b6f0c0801e2d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37e60f3ae0dd4b9723d164ca7eea5a7103d32ecd088ddc074135d7c6f9d5f3b0 +size 39825 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c67ed0f82b27023650dc2030b286c8bb97071227 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ebfd00a850b8cb6f6d035f98e28ea35653514e0944104b36ad80d6bdb58dc77 +size 26769 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..720939dd3b35878d068ce6fcd38f7184364ecbb7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7324e40df09a0c4f3b1f4fce5bff05d31856a34278c4f7f65a4b5d3b9be0119 +size 61220 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c53c7ea8d3f71efd88b0d4901cb7d65080b034c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5be55e6d36fbc2a1a210c5a0c0d3c1c1625d3ae53350830e23efcac7128632a +size 40352 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07daf41f561a7100096990ffc3992fdef81e9567 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:979f3e2e0d58581be042a6053ddba22ff0969a98660f057c78119cf4580e9d30 +size 23783 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06d7a6f54491ead0fa11407c65eb67c153c27b2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c45ab14a6ab04de641cbf4da643322fb1a83730c0ae9dd0fb9ceb8a70cea4ee +size 31090 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c8efb2ebebdbde948586f723dba12e1ce317626 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6f5f19dff78202a2480a3b3766c00300b78b7f3b87f5a237b648254cd4b5074 +size 22924 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56372abb0c978ac420f0d9b49ec277317f090c62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c453a10cd7cb26fc11d7fbd202e5cca5c5379d8ad31dec25197d5f16f0dee35f +size 60909 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aaaac6190429fde0c5efd5edd53f3c1b132ee00f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2982b05333006f27a389271a241f1213486de999901d0f32e2415e023d6434dd +size 25719 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50933a08dda47892d9f399f5b0cb26a8bc00bcc1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:079ff90120d59b9c325f3b6c2a9fbd1990a76d420adf7848c6405b23e3b5e86b +size 25780 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f07101924c3dc28c1c65663a228a73d839ea45a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4bdc4b9a2881bb72b029f378c89399848532c55d62393b3fe61937c24e9b7d9 +size 46445 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..add1548bc59602f1cc9f0c3392bbdf97e2cbd963 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e80401605637e66a05ec2b225efef7f300fd5a6fcb55ed117d337073447663a +size 31481 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c6721e24504bb9d4c6cc0e9bd6acbb8fad50182 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e58f018205cbc9e06fe8ee3373f4f2dd301767bbd146566c2d4d1e0e5ab64ab7 +size 28773 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0dbf4c52f76b22c8d52275546ea36adf44dc66a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0ebbfe00c1cc5fee212d38038658e9f7ae4243a41223509cf093537c26bef77 +size 74558 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c7f2d4f8b7ff15af2b9ff52eef1d2dc5ecb70a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df3bd04d54325544ca9ecf1f7da0bdc31b6ef8f7b969138f1ccce7c74d97f29 +size 30030 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5ff5a94e8a7f43b3f90b90131c579de625694f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:317c928bf2b36da22f55ecd3ec6e202fa7b0cb6880d88b686f80bae1c5c460d5 +size 19112 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d90801c6d6c3e65b6a988effad6f41f787f1e64 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94f6ea28dde1f41044bba9f5d68bcb0186b085e7c187e939f0ea2e2dc1597c18 +size 87888 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47a4e976e0760d67829e04959d914498004c2141 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68ddfab268f0f753a0f40ace5950387a2e10a37dc8592934d94f5777a7c69d44 +size 49993 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f507d4aec62a393a3559e79769266d807a06f3ec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:031051973efcfb21a78a04e871e10933a6fa5445bf900e0bb571e35761f7724c +size 31599 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0421f8c550c67b1d6891455d234c30f9fd08a634 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53d7f44440b950701767e05fa7f79bc1b9f000e5d5888c10207b7c0cff5bb218 +size 145361 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..218e771b65ff9c7daefc2cb89e3c2f601cd1cdcc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be743b605e496018729e666534241a2101642c293e61497ac5efe941d97ff7f +size 44586 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..872f31a6f38b3abd8bb6e67b3cdaefb53a3747c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5df6dd5a155e7a8eed93330cbc231d7bcf9befef02ab9cd8cc0475c5ce42d1 +size 54401 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29a617f096969be9adf9067b4b462273c44ad3ec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:019ab7b43e9cc026500a5e0f05a762ce03db68473409bca61d3ced3da3a90a6c +size 92684 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af3d8f60b6581cba93bd1030722618143fd416ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d30e4e369946aebb3cf6e58083b3fa20685490b3ec023ad6e1905a09eb5081fa +size 57021 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..067713d906d4de73160ad47ddba378e1914a7a6f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:143889506adc5bad6401351794bfe5a63cb20cf36343ad993b0efe897d55fa32 +size 58404 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f2873b3b3eecd1b1da221820e8f48d86f3cd07b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c6c63eedf83c3d9ee056e604cb9c0dc1f05cfa9087514855f1fa7852bae72b7 +size 42819 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..178686d42041ef7aae8ec3fabe09e21730b9255d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32500879cd733421e3149e4671e8bc7d737091b72243af54d3472959c5a22f66 +size 143867 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2652cd1fc313b67334f892db7b427341c9612824 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0a96a21f4d40602cd8ce3ae20d6247bbfd130ce6b12c378847e52bd74505669 +size 71179 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d78c1e887cb8e70184a686e3ae5147bd6e36575f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e978a427b496f3c3e4942e81fd2bd20e640254d559c9b694eef4168c1bc3a0b +size 162017 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a4cbf40dd9b58d9f36d1e8610b4264d3414d1b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1668273b2144be685d95d9577fe4f8db7b770dd8a11be8182afc1fe833d12a4e +size 209437 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfcff06a4ad53c525fbfbb8dabe063e9d2282bdf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833bb3b48086f633fbc5467c4f19247d9cb5ebc0ac86fc65226db153b159afb9 +size 49813 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4072c742de2abe2b039c0f1546423744b130d881 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad5c043dca36fcee6242f19fc494e1f737ee95198ed449a2c0d1348907090d29 +size 31517 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7601d3ba4b6b527b5efffdd7eed74cbb3846071f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5eb84a9723ffd80735eab67d7a0f9ce30fabfcba68ba03291c5de5f729eefd +size 36079 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4468a84cf75267aecf4a01b643e4884571551bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad52e1cfc4fa6a4af3492be601792d4736fc0d8822ebe97de056ef6d315da998 +size 29480 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1417fdd5eae0e329028cb1ab36a26e1684eb0581 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8fec3caa24e6164dba924c0121bf57e11a727b1c1315188d327b4512b3aac60 +size 40320 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f01a6f1951121029cc6af52d35e6df321d16fae7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e38fad7d1f605f99f6ec5a2471d824fd2b40c213388ff2487378f27aeac636c8 +size 27008 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9263a85ba539577c5ce3cdf5aea8a9275f48f2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a31247f638bd5227cc1fa9c0ee411a18adc7f1a6037f4c2f56b6869579830e98 +size 21669 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..045a0873425af63a63cb2088044d6fc0e096428e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b28e388fe1fb37158dbd25bb8acfe6cbccb3f16fba16dfd8bb7462fad069073e +size 57406 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0db9a21e02ff5ba37f87e099ce5b3a5a26f11b46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bee48a02cad1cef8802a1a999cc0718507ccdd02a6b6643e479386b78fcae005 +size 22282 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18938124770d66612fb3628cd0b1216ff2f3407b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1109179a22ee54a5b8c6b846b6306599d2559f94988755a82c55450a82df645 +size 169094 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f873c395d13fc6063018294069d900b10e476911 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b3abb2f3c5a4068abe44daaaadb32f4fcccfb4a5f7ced801184aefa595ec1a3 +size 90242 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..814ee955b6f17c374ef5b28430c885e98525eaab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8add24608f9b8eee683952fc34202e824209f0324a947779f3f117885cc86381 +size 153805 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41291951c0e92e262c7a14fbdf8a0b99f531e0ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc2d88f93da0a0b749615fb4fbb07f176f0c87618021afb3e580cbffd925c2c +size 79364 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c86c13bb27ecc1ce60f074810db1c0259973abeb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c59e1baf39962bf54fbdfa31ab74b079f209240bf891f57d1cc4eafa433cbb7 +size 74308 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63a5fa3146bfd93e3769c4554b3b3f83ff054334 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a4150ee67f4c16794c0ea57aa4354ff03650c234bdd5cef0dc5680bd438b73b +size 82571 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04832d26651a6ff13f60626741a1435cdc52acd5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:affd69af71cf5cc98a6503db147fba32fc7085d1e9c09d62de32f7030de68ed4 +size 89924 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e421217b453ef88f643e13db64fc1f49aa488ceb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896d9336c8cd8390aaac06d2d56f5eaadec35534db961542bb751ac655298c0c +size 1004742 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..956df9eaa903b15406e230ef894c47f846eec239 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98e2f7dd90b3e0f9e4384d9b9154abaa4a5bbfaf365fa51b9b1616580b1e8d4d +size 132923 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08e4da9bac09d446e7e7282d8884664f11a51fba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf9a63ff55b64edd23322dbdfd3e362bcc6dd59374de3bc8b6f73f401004fc7 +size 182954 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f78e657dce83bdf9feac415cfe80346f34581108 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a392606ed1634b533f49acf3ccb7508e502c25e3a6b9edd2ef9f570e72d8c176 +size 27510 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..117681c940fbf4750b135c5970d85bb91ae65a6c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d7ae2127a5a80cb61a2ee4d12d8d57f98b97d41c01f77ba048b00c62aa4b3fb +size 120377 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3278f0811b1cd780f0d7f5c811dfaa85beb9053 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc824f80d9f0baea26827fb5519cb722b6db376765fc769654def2a6bc9f6cfc +size 57967 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc6e3810d7f140e46e20e3bc8c785b51067371a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482a6f06f1ef4f4c009f95f86dee5278080caf78052cec511cfc51c828bce1d2 +size 25429 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..588ccc7d966c5a49ae9efefbb59ded9f3ea28297 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7a9dee12dfb7564ac3e897b44d3a84acd62a41f6c04ae5a28a5a3b6f87dd118 +size 39454 diff --git a/eval-results/mmlu/0/ckpt_219/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_219/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f50a4282f5972dc473bb2636e1e2cfe5b834bdc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89c75d11ecb0c7f8f395d48aef9098a7f5afd346d906e7740de83fe2e4943fad +size 32927 diff --git a/eval-results/mmlu/0/ckpt_219/results.json.tar.gz b/eval-results/mmlu/0/ckpt_219/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3bbb7dc57b59f27cc516ca840970b5ac419732c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_219/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61899fd6e25567b1fc0c8ffee7e9a716f4a8e2ac5f49311bcd828b17ae2ade6f +size 7603 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d4f72220d40f46716f2fc0ba05f15605fe25726 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5362c0b5254fc814e67c8d289365afd09f838d77a27db619d4d4519e0fce842f +size 17037 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d19a7f054cb9566ac6f09228d9d073c07820ced --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d5e811873fa3d3aafa94450a70611d702e3be92f3c00f244d4b230d0c6cb77b +size 29789 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81e7bfa7935ee68a0f57b977907e0be40569c093 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cac61efcef5339ed0f82b8dcfaf4410699d080993becff29124b14ee08f62b5 +size 39800 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d47a00c00b28238bac3bbb17a21bfcfb4cf9b922 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efe0685da68b9cfa651ed7650b457291f0840abd2e2028473e9542e012720cac +size 26777 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1199a0754a5c36610f10f19e1ab223b9c845b35c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50181076b3a2434c9a33568616f378eea59afd94ea2b26f23a6c586debcbdd49 +size 61196 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83f4ce97fc6dddb3c22f51545511852aae31c131 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:506c1fb0c5442e38712172b14e264ae67ea490c14f8bb31118b442eb4c54af0f +size 40352 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..813ceb777752f120935e1c3a1c20a8736e07958c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d39c922a9cf03cc8c91b51ca26b881d7ac997eddf0082dc2f89af6d61286acf7 +size 23725 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a20373381c4080660d548421dfcf72161844e62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f106c49348a5bdb30afe43282ef99be383098e0d9e7beef1ef8a84617842200e +size 31104 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa947b9a01546a6344489f3587252b7a019042e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a01ec99eede0c5787cbb838818eb1041d9ff0957e7323fb37b30fd8b14502cc5 +size 22912 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..737a51beefe8ec71fdc39700bf67ebf2e02e6244 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb2a7dadc5bcc1532d07e85d5eba3b2a5756e5b20f3ddf60123f4d9c8f3d2ecb +size 60896 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e94aba62aec8f24c9fa116e6c6d0dfcddd7975cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7850ffe0d4867b0f825fa622c9792f0de10fe3d80f2049912869227ee2ce7bcd +size 25701 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4310c29a42f313d6335ec830c422244080ea5ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72be5f4f74758f55e2732df89d11681e7c88e5404b4e804ec932d1e51faf6e4f +size 25774 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a71aec0bb1d9441c676efd98346b69e76daaa79c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73e3034a571f1d0d25b88b145f9aec23380ecc91047c6bab05aeb20584c93e04 +size 46488 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a622016aecdeb0fb424b34f98df1e4254a7113c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9b53e31b6daa3167251231438e45c5a5b251e241fb6b234e39cab092651a8b +size 31441 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62720750bcc2e33b703e4a957508dd6146652411 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d4b983c9c051470263b70903df889b3098116ce62adac5ba4779c4ea4d2f7e +size 28762 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61c4d7d8fbebe222ce7c6a55acecf5436a770774 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c41299aed2295f3e2ab9f5131f7ab38ffe173969aa9b57b69a3e03339b705cfd +size 74506 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74f6fcf00ac04a267b71fbc2a4d39786e00053a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73f82cdfbbbca4a854051461c5d537b87bee7c1b5484838b49b5c5e09195561a +size 30056 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11ae98eca6c9039ff3c66ce6594e80443f0bb788 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:874f0c1c4929ade5e76e4c0adceb41212c95f14ffaff90a6f8592b477083f61e +size 19142 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..870b87c2a0956de13a14212fac6ab79e205e114a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20647c22bda5ab7f8ea999849a294dba2e903da7612da7d59a2c303f4b222e09 +size 87936 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbd318dcddf4d7f64153789475d7ed451e06d33c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9dfb35b01bf28757b6d60e42d5257eb09c1fdd583a1d0d39394b879581f22f6 +size 49961 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f14de8f0958db7c8d1b8b059d1d79c11e6ac325 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4174857cfb2df5bd94691a915c8c3ed330b4773236b1cbc8029d5d758b6339f +size 31581 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70a264c9bad27261b7f46446df0b6e48a9ad348c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41f476e0d6b7ac53ceb36763c3d991bc95bd604452f6fb07a538d7e4288eeb50 +size 145368 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec0ff3eb8014f306eb928b889d4f983a04196131 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b9ade0c5cbe13f238c5b32e9b20dc4e79260935caa6a2861e5d8a309b54996 +size 44532 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..552fa52e651647e4d4c95a0dd9b356ee6a702e2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9757015a6bf6a1bd58dcd1ef93ac59b7c8aa55c454252ca6c1873b59a268409 +size 54365 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..478310527adb385ae0f41168417ef4c1c3e94965 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68e16bce51d2c982b5cee1c5185e1466a7ab2d6a6e4ce1b692d503a56f7e4aa +size 92576 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af5fa93d8def3f96cf73a8310a7a6da581b73ff7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b89b44c94eccd688ef1151a2df91e3a27355b53dc06b86f00108e96450760cc +size 57088 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..319a9fd2e42c94382d0977d04cd9cebbb3520454 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ce37be193524cfd01ae3dc0b50ac66f47a616201d8c07dbf2db6e83d6deec26 +size 58443 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a54420226d4f706c8867ef6991863caa142a24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93e0b391b0d754372786842dfa4aa50206719269a715b5753cb6cf0f2d327b7c +size 42815 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e32c163a7d71b59d7f94629c4c80021fb55fff56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74c01fdf2d937c2e2a11ce1e4293585d9bfd0093eb2c553549ec5bb999f9a42f +size 143762 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f3a221dc4165c47813809d349d33edc178895f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8576d0ea88896c86114fba199da2a37c1f3a002d7b0b4acff3b55981e0ee8b7 +size 71100 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8917cd52fbd965cc8a84749bdaa23cd25b236e8e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d0e230f13823bfb445f6803968fd6e98a8db822596ba001f03c884f3b4d169 +size 162035 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3210ac2d0b3d2159646e54c07ea6f2ab62772c3c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bae8b62b228d8154d4f8f192938308d7ae770673953f4d1e122d17a1ced0b9b +size 209379 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0127cf5b0aa9db2df374554c6a380be2c0126e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40e64b6934950f76f92a724bc45266e4a5222ed833069aa8304d4def1f8cb78 +size 49864 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c19138967a8913ce627a8fab546e4b3e8e9270a5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71cceccd2b8668c32f91a4c26c1a3302dd56df47dc5cde7d634ca246e3692e5b +size 31485 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d457bb3650bd580d5ad8333c8c1c73fa60b58c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0931d5216efb8c7d63414444ca9fafee2e3a91afb91d2a5a3f97829ea7bbf1c +size 36027 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..500aae6550de03cd74eb73086380eddd7c2dc1cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a83b6124b9530387641a6f50bdccb1edf0fbce174701dbec199d018852ea8af +size 29458 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25b7146367c0a58d3fdb43d0466f34a1a27466d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc181326587b8dab944e17b477c0022e798c202fc83fba70ba16c85605bdfdc2 +size 40284 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a385809172d86f3b607084442ccda7fbeab0474d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddd13f7b96890da96414817212a7955ccc47df6c21ebe9d8cc47b73bbf979e1c +size 27005 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db546dc42129e6911f207f5e64798079166563bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726be55689ee2afdbeabee33953698e9092a2176db77a5c4a888e0b583a5841e +size 21715 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81de3a07b5018ce10887097c3d6b36d273774120 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5745574b28fc6a73826533a5dec811e1051ab13f9b9b439baba8437a2ac0ea76 +size 57349 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab474eeb01344335ae2e9abe97d03c9cebc0bc46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b7fcecd7a078498027d18446c03d2716c6ebe4968a353d1ddb341b615d337f +size 22226 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f4dd335669e7487fd0ec0594d93d2ce251ffe22 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8242886e6a02c1f0e6b373dfdf9a021e7ddca813009bb983b761b46aef175254 +size 169102 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f76a0a0a7b6d8ee490957b33f8e2642c95ace11f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bdce9b367b3114dd721e3fec3ddbfdc99a2ba67953a873f6e1411e495597a2c +size 90176 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0daa22eba3e678ea0ba7654a7a7975a47a00d421 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1009b67219394be44bc0b62072eec481b60cf2c2948e122caf3c15f629e5109a +size 154289 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b97d5fa99dc5bb414e4fdb9c35ffd0d81df87054 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f037ffd39a03289a0a291741f2274e2a4b248d2b60b921630d7d1bed79d7712c +size 79373 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f26e34fedad6ece77dd6d5ebe154c478d16221a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ee975d081804ca8845df88a942f7c51965fe65bedc9bc9004ba10f452d3fca9 +size 74302 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7035fc694b9069ef3cc11082e0d1e54e3a9f4df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85aafca32766a765fd45457ed3ddba10c087f3df7cea6fdb2c75e583983cd5d3 +size 82470 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..deb5832b4a57047754e29f65d38ceb4be0000eac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1353612808870249df2837eb7bb3cfb5be94a1b1e76b05aec17f70d1b401dbd8 +size 89842 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4f1853a8cbc7ef8cae37f06faa7b4081a784785 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6119be8785d90d6d4274822705ec2213ebcb810c48a3794906de8b64ee5344ed +size 1004291 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2bc9abd98ccfbf1b022754ea70266cf2f22839c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f4e68bb86d8ab1efcf6208948d9434c7972ae288a55b7a7b1972357b76380bd +size 132909 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..474185267628466f9c05e489c22bca4fed85aaca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aafed75bf3943e4b91268d2fe9f0744eec37a5b65ef7d86c7e6b2573de412ed5 +size 182873 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..101f9374635d8907969f8e4468db933edc99cec2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb6a96b03e70494bf43daec6ecbcb477214da1644a832f4f0ebdba6a08fbdf63 +size 27507 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a60751abfad701d0311dc4a97c0f47cab2f45d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40896b1123488039a89ab89e07ef46445be352797e189baf08dc083954ff175f +size 120378 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f21f56cb168714e4db657ba45ac26383e36abb0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12968c167d144de48bd04c21956a56d6f0c4248191dd3ec08acb4db0812a6037 +size 57939 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21b45353b129ae73959a2282196136e227482f44 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ebd3dba5ca3ae86d1ef56f93a8adcac4e83e694c12eae1ef0ac555b07cdaa6c +size 25398 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f281cd36547d4496489fd082df4e3b30c0d60f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4cd4a3ac6b16e18a9b5361775da0800ab8f24625044af9ac92351cb80622e58 +size 39411 diff --git a/eval-results/mmlu/0/ckpt_222/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_222/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfd08c923007f3c2f68ad9437390829ed81af732 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c6ecca78390fa027402314470c48af5c881654f18fc3a13299e1fad11a231b +size 32876 diff --git a/eval-results/mmlu/0/ckpt_222/results.json.tar.gz b/eval-results/mmlu/0/ckpt_222/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d01f5dcb8213dd0890f3988686553ffbe0d6357 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_222/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d76c5b0338b1394ea71abe72576052c6057e7c785851c4531edcdbe80ae5f098 +size 7627 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8dc06c11f7f42e940cf190997a93f10349593e61 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7adc71de7845451864faf3946fed4961fd2987a03ce370f501b72442cce304a8 +size 17050 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..635f3ef7395cc7583219dc3061cb53a56a990d21 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba160c078c7ac5f74b58d3f189c5a4b3ced7c2c9f23cd873a366ecd8e523727c +size 29821 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39475563c703e9d2534fac827bc2b528dfc14912 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1ceec5b08880cca4ba7c2c4e312ce15d40f518976130c76dca14ba8a06bdbfb +size 39823 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..961065f9b7be794278261dd64e7003a7c0a64f33 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88e453ff46a6de5def2a0d6a9044d2f7b2a984f163e99815f2d354ed83b7d92f +size 26775 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93c1452a5c7ec969087cba63977350b2307b6917 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07aa2bafcd6f45a7b028ff089ae78217c41e903e7ff8e6dc3bcf9c00bdf2beb +size 61179 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae653a59da721f585832e1da65402eec40824fd2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b52b6c252d05c1368384b2c8fe59f9058b52aa1ee947843400be3b92aa24ce +size 40401 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d455120e30d8fc7eef7c326348749236e42bd93c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21e11691c54940059da40bdb4361cb00316ae49ab7513f7415675bb1283efaf4 +size 23807 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd389e196b567db848dec7853f3ef036bcb27734 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd9875074663b961f60cf1a4c538e52b031291b4bb2abed9651a19c0f0afb7c +size 31109 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..209a55b4d8dcf5d380745276e906c2a5375752f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d07632086a686c493d5fd9e2bdb41cc2f729e310c0d20329ffc0397f74496e21 +size 22870 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ece7c5103b295deeadc784955e84d3300453267 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:659ce297914046824b15be0ddfd2d631a7c9a0e47f4fced4380ae5ff7f0614b9 +size 60927 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3fc10419b59fa68e7dd5d75799a088974988d98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d93ca3d17bf5edc2b2b095e3cfed80c11100386b87e8603cce43fa470997d7 +size 25737 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4e11bb08d7e58fad662c22fc6fc2a77818b97f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6bb8ca7f89489e2b745d5e58ab2744c8f79c659a5c9a138d39db09b84385c2 +size 25771 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55b1f357a5a42492a64ed833969fa09a6cd0ce43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b506a14cf746daaf799472148d69b09d4bf6fcba5608e98c0c734670a9b8f50f +size 46487 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebc9809cfd5785bdf08127f7e66ca472253dfb6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bffb9edf6f37bd019b5da0fba103625fc28f2b63de8bd9cb5e641e32bc60865a +size 31495 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d55a65e1b105cd1746f235c8dbbeded24534190a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4fcdecf63ad56d64e61edf80cce2a4b443238cd4721d0b1c9f21e92edb0aeb9 +size 28799 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28274ed4599ffbe1d652f9724cdfa08658309ab8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde053da548111133c6795a07450378046f29613675266909ec4550d5d05d4e4 +size 74513 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f1ff5f2e22d195e648678c752bdc7d87ff9d18f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36c56ca222500ed8d7f42133e41083087be91d4e9507fdbb19f8aac65dd5f641 +size 30066 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4e49ba9f1e2f864e9a13466b7c9d0e2a4a82944 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffb13188a60ee2aa122edff5b65392ed0a7df215e6f847646fe65a8da45c8450 +size 19128 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb1ca27d2a4c116649ccfeddba49113a12fceaec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5141079fd47e184bc3114076aad6048ca78bbca91225f165dbee9e901ac8f7 +size 87958 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7eb1ec12c394f152bf3c3685500b8444590d604 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1b3fdca4b39b187d4d19f1f726d9b2b46d9fdfce8642b2585509e96774781ea +size 49978 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6be5f48dad76e57402cf5253c8a974e862a54fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c4eb74d2fd3676c01744e4d3205231a79fe258a88d94e911f713402c5d24493 +size 31603 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75bcb5e705d2008a34810648c856b17c345eecc5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf95d9500af3e17cdfeca174c6a1e2a6c078fa23efc5de2d53fbaaa6be5e525a +size 145495 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c5a917e9b845d2356993ffa12d6dc3dac03c7ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a1a531daf413bc0c7101286a4d2dc8d728708a703ba6ace2742bac040acb194 +size 44550 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3134932eb369d3983b4c3608288d5edb8776ff7e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed160a9e72567e43a5f95df5e3cc425c1e102b85b9f8dd5ec1eac8e23679eed +size 54414 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..283eb8ca5e4f672c1a012881ee17322c1b8ec583 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b472e2d252c10b026b8acea08c8893c129d6e7e89717e4093a1521bb7dacde +size 92711 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10035d68463c7cb7db224f432b9ff5d695ae6ae9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055022a06b799a90d1a3b6974ecc2045623c0b5626de61686838c385c328ddfa +size 57006 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e60f2b30b956a0af603c12d804d097fd9333286 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d22489b3cbfe90bb49d512ba1c602a3deab64c8c8c9b95c23bb394414b746965 +size 58425 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2edff94f39d3d9090cad3984835f2ebe765e6b8c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cc2a633dda50f2a8c6eea88c8dea51d2eab0fa06fbf4c26056c2ec234d12cd1 +size 42895 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eda7e9a22fedc12a4a5a9a7f8f02fafbc7e81844 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b21d63f89841611211561f13a3612117d09d3db184a60c88b1061eff26d28edb +size 143858 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bae44693800733f27bb39de374a6a9236ac1269 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0224e4afade07ea4cd1bb95e8035eee96a252aac14218a24a5762fd737fb71bb +size 71166 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2ff60884c5ea389a4739aacc73bba44bb2b7f43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96bcebd18be5de891b76b40240e737cf30f4740a2365b3a43e3907cf66d9781c +size 162042 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5dec4a14dc6b50a52a67bf3b4342b99ae642b242 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7edbfd2e8a464d827681d46c00edb4b8f8bcaa2b2178ca7a523096bed2c535 +size 209528 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3482fc0314302237df745812fd2aacca489e60db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:505f22ecc0ece623b5aea2f1055567408a8e4c9c34beda58e7b2d367353a55bc +size 49932 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c91bbe3e44243afecaf785513ad4f6fd0858fdd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b76842e64940fad4a278988fcb2521a794fc458cc602d56dab484362ca2ba3bd +size 31527 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6874f2f00c93fa85e82d1bc5c6699c69e481fd9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d8d1a7bd75b5983e79bc1c1caa79cb72fdc7ee6075183879a1a56e0296535a +size 36058 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0e80e8aff3d7e9a124c6ce197a3e917ee3e6645 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f8ccb3a69959a32473310cae39bc9c409ae6ab0340cadc51a73f7ecd41785d1 +size 29451 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e41c7f37ebc433b47159461facc2d47e046c991 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed3fcbeedcd2beddb06a0c5f36c663f986afffb24096bfbbfc2310ed9526704b +size 40331 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6883a1dbcfcf5901d9f31026067bf048bb6b943c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f56772f10dee14e69168f5d67b773d9d0f6f44f4013a628970c54aaf17440d7 +size 27031 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f776ffbf88785c9f7a098ce0ff522ab747cee8e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a59303457a822df0fbff635539c3924831d2d3a187250d991b4b364d95ecef +size 21718 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a8bdf0bf42ea144c915e36a1e7785a37be64a6d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eecd740068d4ae6abadc06c1f116f31e038505d332c17d1c1adbe39595de1da5 +size 57397 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6179d3f6786d759e94bbbd14b03168c5eec83696 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2429a081cdf650ff1ae01a9cee0c1a4d1df739241ce891083391f271443a1f4 +size 22252 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2291ee13edbc72aebdd0bb5d40a8f6425345f852 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da1457bb123307fa12649e3bb52c868267988f6d6c8d48c338c42ab50fc7711a +size 169162 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97d677f45769da23fe98062ca23e39a0793bbf9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c44ca7a526639b04207f7a9f33d4c406bf17c77f720070f023ed4699d3120536 +size 90231 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84e65dbd6af707a5bd026a2786449525f1587d98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d55bcdf671f16e49cd9eeb44dbf80f67dc678329bdd9525cb560d507cbdb08f +size 153935 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b04790e66caece71508f73361d016de70e4f68f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e285d3d8360592aceed0621339b7565b0a1591e87b6cacfaf99a8061765438d +size 79416 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..881b7508951f5782bb295b7f34dc2aa8f3ba5690 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5292078653cb854b14771b835f9a5071b44d1c804e8532b90879162f65d88bc8 +size 74380 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..874097036bd73a9e248267fb513bf1907f9f4c18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d558eaef7d92c6618f221a2500bdcdc9888178cd52a7d2cf671692bec4373a3b +size 82537 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a11853eb78300b4a6728e521ef39c69b74d407e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c572d648006b2e66cda8ef27d1bf855f2a6740a056377369f1520183092024b5 +size 89897 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfd81911fb8f0beb58621bb4a8caaf44ee782a63 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c9400d19d093494da3d340f9730be2fb5ca92065f4afb7d1a849bf90aeeb107 +size 1005155 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abb2f1c27bf37a5ec84f072a6180e8d37c6b188f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1faff50a78ef84bed5cfb5a8d2d45dcef2a30b5a7b678c2cf7f221ba6f5810b9 +size 133059 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63835bf1e45d4fb56b5ab306b0ec4686224bf6c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:207a81bb75e110e0edaee1efe33367e9a8d394bfede56324429505aec815112f +size 183007 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d44ff2d2cfa5c2a2caa68d82d2e5c6b3bfdb9166 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5087e1d30dbd199ffe462aad5f977d9f179dbb660a40a91fe8bd07ec9f5be5ea +size 27525 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7961df354b59b0ec66f48aeede9f70a2901f188b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9082cd9103fe5a0a83c5d826704a87ebe1e60b5ed36d68d44c787357066e8091 +size 120499 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d207f9d0912e2c7a777a73826708d9cad90f5bf7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1d53b2df4644feef217486d01afa9006cd1bb5b43e9a81a6a8fd780b1e80306 +size 58043 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..21874fcb9689b04376bdeabc48b4ae56ce71859b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c4b500d9f35fa6c0a69d2991a5943085665e9f18577fd419abd7994fb70e801 +size 25404 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06add15fb4f4dfe07624cd05ddda1ff2a8b5d9ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d5bc481a21b4d4ed3ebbeedd6bf2f6051a7bc5568662975391026da70ed13dc +size 39480 diff --git a/eval-results/mmlu/0/ckpt_225/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_225/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b7b5c4bb5fe8326577eb2818698f3375871958c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb8173438cc0344faab241dd032f41318c3f701e27e09f61494813f29872e9a +size 33001 diff --git a/eval-results/mmlu/0/ckpt_225/results.json.tar.gz b/eval-results/mmlu/0/ckpt_225/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..759f07f9792c8157990be4eaf709ea5ae9f3ba59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_225/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efd2135e35ca59539307d430e4cdc2f83ee6f6a036cac22cc89bc81be3950c4d +size 7599 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8883a4d2192275f7b5930dc1b61a7fc51f7a764a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f93e8488f67d85a88e5d429b95b782bc010cace633e47de0e59ea25414c5bfba +size 17047 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5918db84d54f3d3c7cf15d2578e2593e15d3380 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04fb423f188ea2b4ecb1c3265733509768d9346144cc1686372531fc9487a0d +size 29829 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2691631e4ebaa39069e35356bdcf61d5bf418593 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c130958acd1defe412740bcbb8f9e256fe646b8ea9a663a122764f6d2021a0 +size 39812 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1de5ae82ccd1f4755f7c588612ff9eeab55dce54 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:778c773f0ae451650887c069ff80e851d84a1b252793108ba7656123ef6212b0 +size 26736 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97a978595786b62649a357264c98fa28cfb7711f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5babcf92857318a10204398d18b5fc94e3f7c8aefcecfb3aa885a67ca96e12d3 +size 61255 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f2dfb5d41308805fe3be89c0ee017ced4ff55cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b5f3b74372495715a08a9b9004b35f133226957349e15072b232d5691bfd21 +size 40365 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..355769a2211009223c1dd80d9076bacf9ebab0fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0602395b8bb76242b234ab566c35e1e21660548eaeb4654d2e0a031a55739589 +size 23781 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64cfd11dfb99c0b7d0515f964d1c93b2ab9a315b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90ab6722599c1bb6de03060e6b30e58bcd419fae3f84964edabf9ff1526d13c0 +size 31077 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce55de1361bbf2805e3ab21132a584dca2879d0a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d35c073138627c93271f189ba4351a498feb5acfd82c7d1126b6f0ea84dcc56e +size 22935 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b1656627faf30dac6599aa4b6ff809f0d86609a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43aad9afe9de459bd6cb0c08d1534e44f94581cd08ca6632590ec61897ac165b +size 60958 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5a252b8700732450a7013de1e433a01fcd5cd83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b2be10144199287c944b5f2e17e4f14752f6ccca239447cd61f8c1a366893c9 +size 25689 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d610d03360fa54f82088423fe97259d5d9c3b837 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a1d81c1662b82d216f7f4838e1a1b49cec904adae79afe3832dd12b8e0a921 +size 25771 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86f0aa7289bdddc75188ad8b71b1130e376f913e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa959bcb366a861da5364dd28d548d5a1efdd89cf8706bac9cd7c28b51ed2688 +size 46475 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe26be2d4cced2581af7693b6f80bb30ced07017 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d213d3397ccf0d004474559d8dedc92da667674cfb0a9385d619e88869294288 +size 31478 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e26eb93ae89089f0410ed0eb9e8d67f2a42a469b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f8e669b1894f7198bb559019dc1a3b25b66c4f5100c066591fb305f23cd332 +size 28750 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c8bf271e799721c5bf8ec2443b0dcaa9e3bfb95 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec7dd2dd95f8a2a9c3fb8cbe9a142c047556b4939eade6ac53594c465f6ebb1 +size 74527 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6ee41130a682d7e37e14bd0df2b4d8683866a0f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e3a42afa8934451c0aea871c5426189ca04c3df1222411a0259266999d0bab +size 30076 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72b9a94ff61759f50f9d1d4f9da406ee0bdb3c3e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1279b3b1e2e90707bf2f18eae58e67a03e148fd9cb624e4c4258b60f146eaaf7 +size 19133 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..608311ddbf557b17be3481aaa1599b6882af1719 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e00e5e830a35bf00407c0e95288cc5cdda6aff0635926840519a5a5d372447 +size 87923 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9f1c699ee862193c04f7399d95a9a6ee382b4a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcb80910728c1db18827da74315fc7702354e257da7fc1a912c033da0489dd69 +size 49971 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b20e9b055a93bfc17ec852cfad5b3ddaa3db5a5a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dc5b21194bbf1818f0e23f24d1d8c27b4b61d8b735ab723cf068abb66b87d38 +size 31596 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..560e3805df1e7554e1f7131ead7f7a965be73766 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63d504be0bcb2cc21b55d4ff80e5ecf4d2e2c8ba7986df28cc239a9166ce5309 +size 145468 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b40266a0ae530e093f994730c2193a1b2a5a15a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6229315ee60159b1fb6ee25c91366c95f3b937c885aebf2abc7d6cf29b0276 +size 44536 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3950ba84852f1866e6e7810a56e67768274c31f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7c3529b412f15a78c0e9e72ddce5d6a34b921d1e6a6f5c7f4a8805b050ae9b8 +size 54338 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4199f0ac2f0f5e1e721f95812e0c0aa4d953b112 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c09ff8df13c7f1011bece3a4e7d454bdd2b5f5dc0b2b490b1964d415a073a20c +size 92693 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e67a72ba89c32c605f9c771c31cfcdaa9fa77b7e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c19847799748c46b2d23f2bc6c8318e84f28d121da0113f96b646311a577567b +size 57090 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65bcd49a64b4663cded98436dc419a208ccf1476 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e233b29688adb5fba2e5ee82cbeee35716d7161216c43263f567b5a480b854 +size 58436 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94912c63cd500649bb220db577246d239fe2c90e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d759f5819482d055b733a5b5d1df431cd1f6c132bebada867e59e886a852ecb +size 42823 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f7229c1ea96cd83c639324cf821e1d6d655c0b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836dadf985f593bfca9c02293fd19f8b01f90974b6503f2de8ea06f26d0d8c9d +size 143769 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d8c69462152c9f8c3540d95f3253530ca06045e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ec683f11a501013f2954639c9e30be1f5e4db0629184cf074306990f689d12 +size 71121 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6a629486a8b62a08ef77d774d282be34556d0fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:071abfeec94bfc24b139c90b3992f156ddada08fb31d5b8530294e459b833763 +size 162022 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45c6545385a359815edaa4387d795144966d2923 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a39d4eef9f640d163cc9980dd55d80ee7fb4d7f636dd98051aa291627e587ae5 +size 209435 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9df4fdba0b729e2b32071993da9111b2aa0230b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea735d01fd0e17940ed794b74dd1f35c28c79aced03853776b0be20574c9ee50 +size 49936 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26fa481082d74c5a5371fd4017bc271a4e405694 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d0114a8c6e76fab39bdca08c06936195e09bb48f457d7b076fc37e2ac7bdaa +size 31528 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5566f1ad72c8ce0bfa0b9045263eb3b955f4e779 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffdfdf0cfadd36f75aa704c3d49899bd512b8ef0b27dfd8bb3636612e4d6baed +size 36018 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89abdd3c28cb2fa086a5f5070c842142dba42adb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cd3e9fafabc2f07396a8f2522a03d8fa7381287885b75841f8f8e790e0a0537 +size 29485 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bdb1581fcd90f58ee132322908bfa2fe2147067 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9e292af89761a7e7337a908ee73e86f3860cbd5b5f1ba018e3361dc8af02e7f +size 40319 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b19409cfa28d1b23b901bc6a2df7154cf6f43c58 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcfa9bc80cf1c54bc21fc2354e0ed360784e18493b814e60e359ba09156ced9d +size 27068 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64a6dfa32718ae3416d81c3cb9b8dd58952d15de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14578596d5a7394f4254cdfc634f1668018b06d49988977869a555f8dc394b39 +size 21666 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82eb0d1ec1337f62c31a934a5780085f4ff8d608 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:104a560aff1d75156cfd0a8522ae26d5c805bd82847a335f14e588af71779049 +size 57423 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6fe7e2b0d26f0f972dd951acb6d727f8873d3d91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bad02155c4de13cb7785303f6ec233d7a379843dba67ce774abb55312668751 +size 22228 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d67530290eb507e351144d371cd62a327d9798b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d9b7903f71c6365496438d70ff1deb300e476fbf791202c192a4cadd2e257fb +size 169057 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28879afe14cb0f8a0babfea028288c2d6b035dc9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:727f1910a5f4b64c269bf0fb21a5c899587d8db137a681c0a41ed656712408e9 +size 90153 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1997fa26eeb1e2a407aaaef50c2f1d08840f1d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84f5f8f9a837ed8ae0d602060b4eed9dac4f421c3bccee01534fb856fd448888 +size 154353 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b004fff4281af2dee0d75ae063fd839c86fafa7c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7466be2269f88c072752ed90f2a35c1f8997ab3af0fc881459b91bc223ba461f +size 79428 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19d96d00bb7fe2b82a22cd1635953fd75150dcdb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edcef224c1d647a7b77ecb21a2c4968b6b54166c96cab1312ff86b23c3297137 +size 74292 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb8026524ef36509d717c9ea942158ece2401479 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9294ed4533941d04f374c482cf9b149e0bb5e5a1b8507829afdeb4987e9d4d99 +size 82481 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69f40fef62a927c71f801003d6b2162fc2025e56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cedd977e56e9ff5af2bc8907eacd01be930c56bfde4bddda16b308b33ce70520 +size 89908 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07cb8a09e52324dcaee439cc212245df13bccfbf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b58f585bcb615b8f53709ba1d090d9fecf2e427b46d1506d6d8313d6339f6c2 +size 1004703 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d11170876037b751e5052d1c78df1851b34dd380 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6ef2ead502a30f0dbff4d0a84c5df43d7c8721eaa6c3dad502922f5741c42a9 +size 133073 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f13c2d8422afe0760ff482ba6dba58a7ef60df57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:290243d75fc9f03b8f9bfc5fb31cc85607bbe7398e6001800b43ebd7e2629f21 +size 182908 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6915022dde9acc3e6b1d93f2b1f9c51ebc3ec298 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30134eeb121118744eb93285977c5f4b23bfb7b33a788a31b49ee642c8f2221b +size 27522 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2059067f3ac17f26f172f8693836b5352768e996 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c819fc33fb749464d282fb3a56ea18af4735d004ae8843c0e948b4b6d4d19472 +size 120380 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38e2bae75402ab57ef9d3d8301ab6ae62138ed14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe60be3ee5267422a229de0cc083d664b37d91d69687d1f906cb0cc3ab01a67 +size 58003 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86811e498e0317168ef8918b1dae97928dcb0b07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2762e7b8b489e04ed11d766abf9a6227cd7df7e445524b7e196563e2ddd20fea +size 25408 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc8f5c297eaeaa35478ad8fd40903d305102650d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e975154a6d2d305f7ac02014811353e6cf37de2b66bb3f0503decd29a15837c3 +size 39406 diff --git a/eval-results/mmlu/0/ckpt_228/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_228/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27ed8446dae0d812f5fb2abab577ee3abac77447 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b831a0d80c6bfcee32f83db8b8fae1d1e6d9c2fda61ec0f6c9838080e2972bc3 +size 32972 diff --git a/eval-results/mmlu/0/ckpt_228/results.json.tar.gz b/eval-results/mmlu/0/ckpt_228/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1a5ccf7918cfae09b2399a0a37402fabfd053fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_228/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f8c7399c5975dad341331ba65c3dfbaf1d995ec6d9ed99e026aa0d9550cac3 +size 7652 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d5bfdee67e4ed52432e08219d20241e63d1ddae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:117df494475664d2b2ad66fb2660d11e5be710ddd63a347a944504f3a6808c6e +size 17039 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41a81a36e7f5f4c5491fa987b570927ac271d74e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:850ba9931cbf9250243d5d495ab500c5ea5613d367d418ba7efd1556a45a9f45 +size 29830 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4062836d947d328b36a97c435c181c7115a3d2ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:031656981a6425b1cefade73a5fd29d9dc31364e02a6fe4fb50bb8814af53bd2 +size 39825 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9314c73f5b205fa3997d8cf76b5b3a6ac819581f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26cc9a2ccd587601ca05ee5dff57df9539eb139b230add6061aa17f33dde4224 +size 26739 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..496e2030fe0368d54fa7494bf4ae48ff3a6b31f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fc8c29a89c02100e1b0333f0f297c0795d9b43165c91b3477686db69786a0ee +size 61235 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..397843667b96c2b14abfd8648592783a06b2d3f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb80ba514fbe40d29100e284df66a6f62190540a65715ab8c209f6c64086bb4c +size 40360 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65994274b7794cbbddfbc3d28f784c5483bc2f98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dda718f854714e810eac8035831d092e4e470a597287da69b0a045200ac4a7e +size 23755 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71c5fd0f50e9d4eac3735029258a5f3eb23d68cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741f185d9f33013248c0d6ba65b4c2b6ec63de2e0212cb83fdf03f4aa96e1095 +size 31077 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa92b97a4ce79d75ea224ef3a007a8ff21834a5f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63c434ac8bae105dfca7985ed75a8c617cba6bc8f00e30d86eead10e7c70f42e +size 22879 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e489997b23d27f2f0e2f170dc8637b14cc715eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c00c027be59ab76853119bb906db977a936e980e4201e221cedb7c407fc68a85 +size 60941 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..daed401172289c95346cb82a32e5ca438696bae0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a208a54157930969480b5bb36fd87dd0c4c5ab6dea3dd69c6b8a5b7d06f7b8fc +size 25701 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1952ab33cec059d878d107f43ec44744c9958f05 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1421a698f0dfd76df1a02f538762f457f118df4c704563ecbd008599aeb0860 +size 25774 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1129323fe601e6d61964f658dc81521f9ad32afe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad8244720c4e1edc39c5e85f207d46de160147d5899e3f09fd3ebf8866c2f91 +size 46472 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a741a54d6854dc67c3d86a1d6c48c1fe636ba291 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3207a0e318fc47edc99ff05eb4481e68b8930af137193c1572b144ece8af745f +size 31449 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc47a688cc578b9620331984d39206e5f40da1ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edda1713414d07468a845a387e940ba51393d0cd65884bdac4569ed65c357e1b +size 28788 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2eeba3fba7588e8fdb68cc6f6d0fc21da909b90f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:345f877eeeadf53ec4cbf5a950dc43aaf185b8ca85fcd389430b24af58af53e8 +size 74536 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c926f82c7821bf8f4216031d4ec53fe7f6782807 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:778543d034b6a28fc3181e0634e0206e222cbaf32a910fb9ca9230bec2ddee87 +size 30049 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3f9e2463e7c8160ce5fc6adf08eb2d79d7c0f10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67660fc7d1d635f4f7df6d61f33b96a725eec39c6c958db701e867526e8f583 +size 19089 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b14b49eb456aa062cf2d24b359d9430064ee0849 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc112df9a42a412e581f5f1e3501be18ffcb50a9b4c2377d152f6cb4ef85384 +size 87942 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f75370f1b23de3e0efae5847f788519c9d978e3c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90428d315644a788ab8c735bf88635b5a5f9ac034943dfa37364fd1e15d08256 +size 49919 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4e242eb38855c0c3b323e9cfdf3c7fe2e87b7d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e340ed3890c20664f854e5cb67763245fefeee8ccf3bfb9b19284ed0b2f4cf +size 31602 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14b3df99cd40d1724c4843ea10100b38492a53cd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:012313814684c0ea33dca12ed9f130cf5aec7ad1785614dbf876f3f56c0dbe10 +size 145403 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38d0fb10efd94599298734c7f07f45716587fa02 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7098e3a9a47fd047a8db1e07976e180a45d60efd22c6dcfb6ec819af1fc6972 +size 44581 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bd3f4299e02ad38c4af19be42ad8d19385545da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3619429a63b0883411d3c8395397958b116d6b4471c4ba4eee290cc18d8160e5 +size 54315 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3305df3722966098a4d616a7771b41be99cb37fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c7c8d487addc9df845f8a63ad4039f99696602c581dbd3f65965b2c01358a99 +size 92666 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae4d61484410f3b33d9fa87aa0e6c71067ca7e3a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5db16231c43f2af051e622e69cc9c064388b560eac85c7efb0e51db01cb8bdd5 +size 57044 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7aade65466b7a2d1c39fedb16271899da8f55397 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2353f7d1ddf2da05429a937a3641f2a5495377323f809525d7ec2f241265d9 +size 58441 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2dee614e53f1edba94fe13aeaf66d3b061991214 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a35b0a7a74e8e30df6ac6bf44e372274a58a64a7618c4be8dcbc77c0c2939d3f +size 42811 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af87a1ad0e4336b7b8e58d4f4b554a3498c65652 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d3f7813026482aadcca651f0792e03fd8f356aa614af1a5fa155dae1d7424a3 +size 143851 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86e712d6d263d533e68182419be9ba4061a81183 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77b1bf69b044346f2e8bb49b7c918651c65ad8858ea31269ea0c67ec40fae269 +size 71123 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..389ead6bc90f721a572621ac2de0c045faac3a40 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49ee47f1c815c34a5de5d0773cc814d611f553e6b6bd7621726292ecbeeb5a73 +size 161939 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3656a1f0df55c04b3fced8db08459690b843e2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69018615f4bd7ac71d7dd064e868f608204aa160682c6efb285fdedc0c99bade +size 209390 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..919cf1d5eef32741449e4134c654b268de5cf3c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e58accde158d719b777fe31076cd0956b558943dffe3d17a0508148e0454d525 +size 49971 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f1c049c27556734b9f52f2336c42cf371609371 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8608bf942f777fb7a5f33ebc2ef33c5726be6a76bb2ba705f8f4c0c32de28cc +size 31535 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15d2a368dd37842190d111cee854315208be910f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30434f93db4c571c177011f2c8731223757cc65c361bb0ae97b7a860fd2ba6ef +size 36020 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7b2b2eaa0117c12c1f8dec31db39ec6d3dd647d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24e6eac28085e1fd0fb3f92ccdf82c3ef9cff2b6ec3c01424afe23282d8bb858 +size 29507 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bc63aaec2d7d64c5808fedae66c915532ffc663 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc598db35c57fc6709ff188f527d1cc75152398a9927738452d0ac61f4a6ce1c +size 40351 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7833340bb3e639a9e39da0c663e0991e4466857e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e28b0abf309960412782d0ca5a9ad559d9a2de35a66691a1f5e925a5ba3305b +size 27013 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d44688084a2a9b2b4e690271807b38ceaa5728ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c57af5d14fd57234d56fe1d9cd7662c0544ff3b11b9ecb4fce829f137ae5f649 +size 21713 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f0ff0020a1906e5054c2fc0fda03151a56f0c2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15ab182ad387966f0484d66a79ef8e5928c160e5e656188741acd9f6b2f77c0f +size 57395 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b6838b4db49f81a18a16d543af83dbfd58780a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63dc5ce2f64df067731ac77793de7b249a4d0ca6e045c4c19b84e63473a69308 +size 22273 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad95dba636dc5631b25e9abd98830bb1c0d42763 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:776aef042537e883c38c90bf3adb85264a2527f6b19e3a7b2041da143c42f579 +size 169201 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae26cfb3a2d054c02806e4dc1f7bcf0c469b94e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503f391046dacacdf0f63baa4d1071b1d0637afb237131d788e5c4d96492fa14 +size 90123 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c3e7f542b0c5ebf3e5f6a96c4b0ac4a6d887151 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2634d7b3e2325982bea478099e2e4a99d2877c176be4cc0031f524aef75cc9 +size 154406 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39b0222b3e34a1391160adb578b7da998a97f5cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f9a6975b84a3d92aeb3edaad03144a54367866dfa2663ed70305cbd0adcf6e +size 79382 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5681fd892a7454d23fb618468f5aa543c77e1fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c367c68ea45578483afe98801f1cb2198580a11115a56f501e73e01ac5dfd5d2 +size 74304 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f9ccd35963c738d2772f7695469833066986e72 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f860f390731914d083877324ad7517b7907f1283bb27b7ef178561d13fae681 +size 82564 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de9c0419fde4487d589c483dd2815390c747401d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965719917e4bccb7dfb0f717cd70088ea8af9d970e4fd0da0fb3a5ad82a85572 +size 89860 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f93f1240f7176bc10b8048b00bba4394ea3bf3d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3322158770b9450cca0d0c07e64dbcca695b32d87e0b63880b380deab82eeb93 +size 1003870 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7d3a19bf459eff776c3ce14e8f677090fc2b487 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3901653682c4bdd6493881fbf734dc2d2c7605dde2f4ad8c4cef28e30d835e +size 132965 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..053c7229e50ff3c71855520707ef3d69a63af7b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49f1290f693297fcfbf035e38f0329589bc86d6b9bdfdc5b6dbbb5d56d9e8551 +size 183019 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ceb41c6b58d38fef5c8d7d7fbfea12c8c6b250f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d66501039a3346f327e856fd3ddaf3284b332f53fd705a8a96fa40cb2878141 +size 27524 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ecb8347dfd85ba7522a510de95de98899ae4f71 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64db3141fb7139c03f36fd3d7c50953946ba5d96355f868b9415836b9c96baf6 +size 120425 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0d1642f5c1b1d9916e8a6d937659197b7689381 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fc7f35f39878bf3c3edae4ef9aa2e2e35037ac159cf57059fd7ddf650e25c09 +size 58028 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07690b122ccb7e0894f65c6dbaeb4d4ecc68a8e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:342d9feb3a230d9b3e149fa56f9c48e435c50aa43c877108751a921c0d21d1fb +size 25418 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04aa4093c5285a515801941fb65049d1b5d2a7e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68c62b83f1157553cab22a032829b2808c36f49cf8f8a4d54fcf20eeb7cbdd07 +size 39448 diff --git a/eval-results/mmlu/0/ckpt_231/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_231/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5778a07705cb4f03ac13643870f3cd81581e887a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:708b24a247f7810f364b66c2d03dbdf4e25a76c0146f8302928c0cf4179b7aef +size 32926 diff --git a/eval-results/mmlu/0/ckpt_231/results.json.tar.gz b/eval-results/mmlu/0/ckpt_231/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a9bbef058b52c7c6f88cc8b8526f31968b4f22c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_231/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa91197b21840cb41eeda984bfee5b5b811262387d3acc64398151134800758 +size 7583 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d9eecfac7c54b8fe9e29136ef1e1ce37b08e58b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79946d6f03c4bfbc2d3f8a88aae470e5ca8d381b4be71ca8fbfaed80a1be7bf2 +size 17057 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a351f1dcf25003e8568837131915147fe921013d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56fecf831e5a83e1c6c87665865c128782cb9f8799f05f457109328952c4bf04 +size 29807 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1b161c110beb439d570a69c68dbc80f002f1b92 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42938a1ea11d55c71c898cbaaddf920cace32502d3ed30316d8e06e46e75a4d +size 39839 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1dabf3e4136e176706342b0dd41e59df5d4af638 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a0621dcb3d3be38499ee1b53a85c4bf132fb64af94699fedd53170635b5bb76 +size 26768 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dea5240db954046a718720153d8bb1711afda9ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33be88fd3598380bf48062c3d7f41afb20076e25dbb2f711afce6b0a4932d0f +size 61208 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..833a4ae6542c8b80f0798f873f7234bc177022c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:866c55175eaf71aea02db4e3e5890cf5c30d15a58992e550594c290266c47ce4 +size 40342 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2e593c60d7933662232c216ecc234d38d43e618 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a52133891265d5ad3d746dfb01c236b22f0955593d99da87eb7c8afd03da55f4 +size 23802 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76ee3a38bce7147ad9b5b57f0ee048a59ea137ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29091e24a2db04e66e555a061dd878863db1baf9cbbc26e3573a4ea4197c755f +size 31134 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebc4c05dba86ba3e06c7a9b7f7f03368528f6206 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ff7be21624fe2019a155790e4a502504b63546e223a733c758f88f7dc85e64a +size 22882 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba512e881fb04a8bc9e8e71b3a406966dbcff389 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b73047bbfe4034bed2876b233b1dc4898797ebc8db64a3b9bc187654e190b36 +size 60940 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a3ee603b176ff858792d879b0696a4533c2fa76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f9a0c2350e43466215cd4e312706a2bc5fdc88f8e37a3be68e5c30ecb9d10dd +size 25725 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52fa61404b6a8477899489be62615039e6f8eedc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d844ba19ff170e44f0af6dd52898b002d31ddc8f33086744909da5201eefdff +size 25799 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a0539b831df663bf6df309348fbefcb9073c26f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb2572e628fdab85257f6fb38efafbdbd447772ac5ac63245d51e8fbf31ce257 +size 46530 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f241a9d78e76fa848d3c2724d59bd8302082fc31 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f27d05704421e756673a898191dbe19d21ac0062c131d0e88d8e6406b6f5651 +size 31516 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26398984553fc843c3e080094a436a47aea338c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a9a2c85a3ea8a49c4ed1205be02cd9cd732e0fd4b6077b249bd5c74e919300d +size 28813 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4c7e16503f975ccf489542ca66a1907aef12db7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57003c7714437597afc9271fa31bd324e3dfc89256fd27d043ca46159543ac5a +size 74622 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06301154d9988bda867b52b9a9f809df56e402aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0258007983f38ccd28328f6ba7dc3d884998da1afe4170c0fc3734541c4d06 +size 30105 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..974a28c13b1c19ea2763bdf73c1ec70499566071 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956b650c89dec76e0350680d3cb4a8b8f3da993d302bd93c8f127d740242847d +size 19138 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c8df3f51dd803e46e4baee4f03167074eb64a69 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f954a9c0c14e88243366b0079dc27c1e90bf85e096fe035d0a20c66da4b7e7a +size 87999 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ef662d64033f7fbfa53fd89ad80f50ad92a98eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dfea637f5f15205b486d6e715dccae74f927e6b49fe78e906b77685cee4a088 +size 49965 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da45ae95d775b38805a797f354679f99a7384805 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c968abc54fd58daeceb79719e860648afbb4c8fde381931df2056548f213c750 +size 31585 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c027d9006c8845d6095e522876802d8678e4dfe0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:634122b3d42af9a9df982ddd05771c4fb45e4931bb429aac73d3106080a62665 +size 145482 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abddd4a609ef9ceca6503fb4541d6a14e52e7e48 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa1104d66840f6afda889874383c0003fda86a8fe32ce867668cfc6b16688bc +size 44529 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0802d3131ccde067647a1309cf11769c80134b51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e9f8819a40b373251a8679116378fd85b53b78ca59782bad90b3a924c28e3c +size 54364 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89d21d023671a59850b73263f3a76f05966e88d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cc79a6447a66232a6e0fa2825ebbdeb4234a2226262b9d3667b5f1b71cc583b +size 92784 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4a572e7a533ce041166756e73f32aca0bdb0a7c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa4468b6fbc59ee9b0db8e379cfcb3d8cdb407fbb6fc6df3868634114e14eec2 +size 57063 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0beac90b3c05a5d809a2f62aa1acfc0f9d1e79a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88027ed56df27666691fe64beeddb6e7d7b0ff2e26b8be0185428f168d8ba399 +size 58491 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6bd9e1f8349502fd13ec4be7709a6d95022784ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e457b43ebbebc1790940cedad739dd1444533c1655b7ea05709e4cc8f1c9722 +size 42861 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74ad132a0ed75b361c983f18d6639118f765da3b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d043d6a541a24fc2412f2c64fa51012eb2bd88c883eb7fbda9a9bd8e91be1256 +size 143880 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52e67bd2988bde9a89e2187acf169f43071a3bbd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c90ef40abba6452a992a7fc5621f7a48265d8cceab98b4de04b51fad140362fc +size 71177 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f9adbd852b4c59ab732a1d45c0850e2354a3c7e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4630d2c6adee2f7e82297af7557d4963de8cb869b1c51b5f3e0168f78cedb685 +size 161948 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1ff4388d67827495f1ec2af6b63b8b5672172a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3fb44889981c801623cd22065e1ad3265b81d9a38ad7ad5e65c7efc8ffed764 +size 209454 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca2aa011b79c77241ccbc0d0b7fcec67fb5ef17d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ec941aaa4ab8b13c0d5cf07c25ead5071adf76b231de6003cc715731b50587 +size 49888 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e9fd5ab1dd788074588e5675d8152652a5ba687 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ebbeaca712d295f4c631a8efa73e7fc43f78961dc1486fd24e4574da1507d46 +size 31549 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7d02c4fda249d8a1483756fd6c3317d41f8e393 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d266e6bb1e827e1976f661de71c9fdb5b066b48636508a154545e3c4ce667b0c +size 36032 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3aaafa482bbc5ff945797ee86e402ee9c0cd457e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:907a5e5a572f661ed3f4b3a4d4c844578b3fcf2dfd407860f6c772c11e11b278 +size 29454 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74d78471edb125d8b47c158277e887e53ba2cb7b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadd6997c7a9072a976ae7bcf4b40fcd3038a63a45f9928e40759ffb9f8afeab +size 40335 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98050b09e49cba1c8aeb11f3b19647a8944afd2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:676c1331ffa7d4ef52496d0596f6b657effd9243848fe3f8166731208d3517b3 +size 27038 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d87718095b201c7674b60a5ff8081f406376ecd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32387bd47eee4438aeecd007c3818b88a0b9bf1df7fc971ab551760e7a67e2e +size 21699 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19d9f58372b786cf4b87e6ace0ad0679ea7239a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5204b3862b38b204be53a771241a6d29c634c692f715ad8d181cf28685326d3d +size 57411 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6fdddee23a1d5f44b2c0ab0ae567800ad352374 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2636fddc1b756a5550c4e63d880c0e29dd0998442750a661c57308bd15e57a5 +size 22252 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cd423959e8991cf0e9370016cde1f26d2a01742 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b051967ae6a9cc3894fa07b1a75c87e22896ae49a8a8ef1c6637f0f7afbc4f +size 169173 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf81fd0375c572e964e8f58f6d58d45281fe6eff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed4bbd319c8d478a9869ec3145f1015de49dc48af727c75d31fb6357c469432 +size 90242 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a6f4e55b358aa4bd5ab16d1343d5ef114b918c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c4bf4ca904785ce195f6cad149f844434227afe58d7ac4c7614afb7a2e21a5f +size 153873 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2cb9f25083428cc6e22ae2213c80da2de075fa02 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd4c8709e3a30b1ea63b80233651482aefb72b445af5646805bd62439515bda2 +size 79395 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72cc70e1f351af04bd91dc94380bbd32654b225a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a3f7c935ad7fdd8eb4bdc9ad2ade7feec887b5f8611b952b3111fc3d0cff79 +size 74322 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5c80b2687d3138bc2e153b39d04126eec2e1796 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95eba0cff8c76932a358bc5087a26a7a13775ee76fc9a45a9087e82bb279e716 +size 82537 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f3954cc1a1be0659442ef6946c61b09b447e5db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b742f3e6e8bc252d78ea4ae9fa0897edd18625bc74229e0c0c7fca500c2624c4 +size 89917 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad54f52af8b403c8bd5d2047dc2b844f1595e34a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0a46e2a01bde56546be245343b71f158ac4fb41fa2055b92fb074150886622 +size 1004156 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..666886d51530a1fbd6b69caa2ae6852128070e91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bc414340223a79ec0fabd65e9c3b802d2ef3b96b0fb7be4e113ff8e6ee1cbc7 +size 132902 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ddd5bc11d9b3760a1dd5e4065faa24010f4f306 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0e08a29a8a0f82b7c00165188076c54c2d55cf8d6ae751e69840d358471884a +size 182942 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fb18be89081e061516420cf2d4f17f7df54cdb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e18b5f1b60f21c3902abb90a165b5505e045a3af434c450a4fe03be4236eea +size 27522 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7db4453065f96460ebb0066078d919262173b900 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de8e82c3bb936c7a959eb4048eb668efcc1d3e7baa69f9ff4dd02acdd1a5094c +size 120369 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..675309afc367f93eeed7cf6d03465f029b0b557d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8978025be404b979e5656f379a4f69f19f8751289ed80c9b0a5e316d3ae3ce7f +size 57991 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..879971635bd83c04f9095e55bb3d47e9dc44705e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5f4069808007ee9eaba6c42dd913b9b07337d1596d61c191fed53d4f789e007 +size 25445 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9035f5cbe2d6e72bd04c76f70e4c4ecead774c6c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e77a53b2913a029a23fe19a8fc43cc0de00629f6948124fa0ed9d9a83714fef +size 39445 diff --git a/eval-results/mmlu/0/ckpt_234/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_234/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69da498850ac770c6b82cc0f096518db775e5328 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76d4f1f46786b331725f3c8d14025c8b496b66ebc1c7ccc0ef487340a79346e2 +size 32950 diff --git a/eval-results/mmlu/0/ckpt_234/results.json.tar.gz b/eval-results/mmlu/0/ckpt_234/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c96262cf61699f54405479415cf1159e20470a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_234/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11f57629d6bbe162107192fc88315f155fd7c1bf2e6accf965a560d9e5783d12 +size 7622 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97d09c6c3683caa5638fbc1bc66a4c76ece7926e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c180b7afdbf38e27787923ef634e3ce1092d8d7b49fd4aed2468d11e2afa10f +size 17101 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23b1fe90aa71323eef37a7d5bc859bf19146a52f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2689dc03a0471d621d56c0eabe71d4965062e590518f0c3cc8500f91c02dd44a +size 29824 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d455d1809d3b74b71f7618b8d00019615cf46aae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71c1ea29572c643ddcbbc59a32359f2e794737dac1c900ff22f724953fb6e4f6 +size 39882 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c272620cc79a533f2b198f0e548107c1025927c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54216c9e194eeb658ae8ecef409f523695b1d0a1a12531dbc367b199f07f3b5a +size 26744 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8672a32ed8f86450bf82c60a4cb3a9f5a83fed59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba68e2f1e5fda8caa48f52304c50f05a34057b25c45545e5bb4d8e8df977e57e +size 61252 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73d0456411ffcfe2d5e803dd99d8180cd790ebe9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03a22cb8aa448800a1593b43853017c0e3280c5c3325a1b39b85b2752166d0bc +size 40386 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1386a0ff72e29230ebd6b46db8b8644530011d8e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d99af2b6f17099d6e1ac5453df8c31188f4fb20bc5577c104009bf8a91bcb5 +size 23777 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6152eadebc0d80a9388141c8b838073786981557 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:170f963910fa675141114ec6682677363d2f5cb14031cc4339e004ae822673f9 +size 31078 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75875c558f425ec9efb48fecd62e64dafb0e0118 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49f4779324bb520472b81a01ff5ac5c76aff93636fe54089e47ea94a2439e986 +size 22912 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..549a78aceb4748e302ee6a60aa6c962ed6727580 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c68134bf413a46a81a06c424d8033f8148ac66d82010db08f44b83c43a2437d +size 60976 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa77f58ba914561a2c461db3216138231c6029c6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d114fc7ea370f1882d461744a3bfc26d131c1910edff2cab6aa1e1adb9c4b0b1 +size 25709 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6088c010ae90b7c829256444e9a4aaf38b276681 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79062160f7c734f2d2b3d4bcd9b5ce5ebce5ab09b66f06bb8661fea4fd55bb7 +size 25776 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d799f7ec58bdb6b7e6b46b6f45db3bf950b3c06d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06e742916f85ed2add8158dba7436f885b4f7f018b3cc75f4ce1c3a4c110dcf4 +size 46566 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c905d95c49431c9a68386743cb89eccb219f6257 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e2030f6acd108d39eafeb0f276bcd1bcbcb23cb5c5d7cc8fe99ea9049483a6e +size 31474 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf8be883f0977b4550d2c25f58fa35346cfa855b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47ff29423f129bd351a6220b6add2d6d455b3950a8a746f74765b02381e502ea +size 28797 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c23b7e6e60c3756badc30f4053763f2c0c2487b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb6141c0209746c16c7fd5a2480508f6a516c12e2aa837a37ba9d404974f2769 +size 74505 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a016ede2480ca0a09c1b66a4e208417378eca0e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af0c2f48f3cf3a9797c0b5eb4bddb335f1907cd1bd2e55765dcebdce190e8778 +size 30081 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a98ff9d935b6eac184b1e96a1e1d0596d923d76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:029ebdef6fc9d162adba81e418c8bffb0d03334c9dfa382403fd8c99cb7b371a +size 19117 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38a6fcdd72dd85ad70a0097021a7bbea0d773a28 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16df7be94bbaa4f54b9cef3d20c2d0113c83c3c04d52be5575a6901d4ebb4f48 +size 87905 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c136672e284c5d20381ef80ee3f7ae12f8d493ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29c0ea6b6cf2bbfd136f6454c1936d40938525dd3f9c91db5e902442cb9387c +size 50047 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28ee71f4c7fbff0849276334055f5d3f12d6cdfe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c128966594490172e6d7777a11c21c7c8944622c0f6287c926ef133f9c7cb6 +size 31603 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc14055906ddcc9f4bb26ec65febecce56d99bee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf3f3f5b54a89094cb5f06b4f7318dd32a002cb630a5ea251b9176b79cc39c8e +size 145491 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..162f3d91d260a2ea498a09199b7b8ef4ce8f604b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1df41d8b53ec1cc0665e67a03073c99065ce222b1d87580852e4bd3fc07d1e5 +size 44579 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2369fddbfcc8b1666202993ac91a6e61bb9dfa3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41cae4f2e91ef6e705b95f54978e45bf39d7fa505ca5a30f5ac28f9c36351466 +size 54408 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59928cc3cfcc4a0119147d35f3d670424eca9998 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23fc70ae60255ec712840b3241ffec8650ac48d680f717eeb92154fc7b89f1d2 +size 92754 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f77a5a1a59ae6987699e4aedbe3c905c491254c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e2e5c59e5d257bcabd05269f3547ea80a63ef07f64c2c920016cb5f7a0252db +size 57014 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91bfcda0633bdf01013c7332277f36345926e6c6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5995a02297aa217f01f40b0f0f348b3c0dc83e9eb3b643964fa3c7824ff909f3 +size 58489 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d68f4164a97d466faeb2447f4a4de967b6d7fdb9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2476af8187796c6e49e4b78bc818b504deb2fb273582de1464dc4338f5d0fbf +size 42841 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ad1fad8bff1a17a4d8756fc43edccb16eb49c8a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b32018b390d97608c9a3e0d695dac7f6d1c780077fc00b323848bf074f784c9f +size 143853 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0312ff61cc12745cc32ade60ceb083e55600bcfb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ec0106fd3c79c33325c9593af32921e3291a56cd95c808ead4bf098f57cdf9 +size 71171 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9d00efb0859e49c129198edd611126e5a514ba1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003b9e0cb9fcc3548681d7c5f44756fbe33dac116b64473e86600f8fd8ac1f20 +size 162021 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da3b83da5ba644d8d686d204cabfbd857a93e13b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73d19e30b60a02763c9e209bb199e2c4e56109feced913730462e54d358b75ef +size 209633 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0557b67d524dd4364d8b88c1cc97cfb2799426dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc28f40f7d6d9d91a557bf37120c0cd8b0d3cd9ab1d6fe0f24081e839410530 +size 49868 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a13ad72b410ce122f9615502273635fbe41d8dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce6a741d577bb2cad78e2b329a545d9a1999271d95afb7e335ba1f66093ef398 +size 31550 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6baa420b0612e985a316383a02c7432bbed39088 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9764f7b644cf6d082735b279000bcab901df0b2d40a8c86a7808b03838f3ce20 +size 36095 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4435b25f8504f105a45a7a8d0e70ac6d0e4590f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9831326da5c71f944fb43ca34b527c3cb25aba4e000b57109f21c37207de8a3 +size 29461 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8572d9757bbd3bfeb72c136679d18cf6f423a237 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19cdf794b059c4087c0de49523586744c91a4a336d22b9dfe597cee3d8667fe8 +size 40342 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7fc825fea287367cdcbdca60d50c4e70a2012e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d200173bf8226750d3779fc8e0e2a291116495d71ea6aa6d6cedf3028593f320 +size 26991 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a6b9e7848a4ac8a93c541aea4c47f8334ee57b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0053cb3325a4f65512ef553ea3a3718d56893bb638338f65fc7694273217277c +size 21695 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d7137de68b9f95e6410a3bb500b53a7f4856857 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74dbb9d09f4b42272c5ce49037ad490d4438c3a9c1c3066dadeb50ee7e1b06f1 +size 57411 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16fba31588cc95c08d572ad69091d6d2424fd94e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d5a2c9137c85e57269d772347b19637102121e601b1d7e21d5276ae021e774 +size 22271 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5aec1da622892799c5652cbb9305cea32d95a9f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b137d345d9dbf6799e95980dfc63d16688d8d78f8d4b00337d6b154050cff13 +size 169165 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3a981e4dd9ffce50581ff8e69d737097301bd22 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a62a18878c7cde66fc8fda22fa2cefd08086a31cc1224fb32daa14b8ca4fd8c +size 90226 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0bb24bd70a7deaa3e5e7ca6fd34afa2fb06a6a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115c63224165022a2c14bc07c41178f0242e1df73f3ebeda013be8d4deea9a7f +size 153383 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cfc9e925db5fbe2708ae3b54acec7f585decea3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e85087cdd621ca52785a8bc672e35443be38ad927b8091307daaa3f408798d82 +size 79416 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ea485581c416507aba29a02daad94a5661a817b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0421e8c5dfb53cf9f5978fdb4e2d81963f203561c7a470e30a2670a47aa66511 +size 74327 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df164c382db14bc4688c52560ac5979fe5d98c63 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e215003a964202fa6158a2f610c25f6843d1b642404e4f20179258266e4de9e +size 82572 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69743e6a75906831490a5c5fce88aee996fd0372 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95258ca536962028608760fb3f5ff99ee749753a3163bbdc052d76dcc9296e7c +size 89858 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a139008f1386cd51cb3545aa51bb2a5dcbf8a750 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33c491f1b438796b4ee40153649563aa2311f3f531bd19b9043fb998e2cc80e6 +size 1004827 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2e7895b45253bd3cc97544f27ba999dd96b8b27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608dfd20dd5493d5c5a778f6262f5fceccf78de8cf3becfdecacd73f0f2cf55a +size 132933 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67056dbec1a1cf879422d0f02c70e5a19a69abf6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71ab4000d3c25d8309434378bcb970937b9dfbe46f6d6f4cea2b9c519ad7385 +size 183131 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1bf533e742bf25e1c5a4f2ed60d9fef53403328 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c69158672d8a925984a07fcb5d5aec99d265eeb85b2efae68096d5651d7ae2b4 +size 27527 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f3ae3da8aa1a68d477a4047012aa7bd340df4b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251bed74c7d87dd13b84679713bd269cb847ff3afd21b9d471c3cd37047af286 +size 120466 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0edd019bfcdc1dd4e54a78452a02ffc0c6ced462 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d3d17cff49a486e6c9794b97ba3e3b86f77a1b2ee1661d3e610fbc4756749da +size 58005 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..105ccf4af5f9d4d39efc8b339acced4bfade6144 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e1d8411e65696ee53fc137cd4a1fc99447ce1e70cfa522f91f5cc7704b2a74 +size 25423 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e20f3715fc4f678e2f4a888735940065ab4ad7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f953908c61fa9adba6621c8834945303fd977ad2d726435857ea0e5684eb6be2 +size 39465 diff --git a/eval-results/mmlu/0/ckpt_237/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_237/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..129ae1ea779687168a66fb907bc75c8465d4c9a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e6f39a0b57894bff1f9c5b304ffc6f5bc15795ab01970645e2594e25412075 +size 32912 diff --git a/eval-results/mmlu/0/ckpt_237/results.json.tar.gz b/eval-results/mmlu/0/ckpt_237/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56671b4ba10579e43c1c90e67d375442e4170849 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_237/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d6a1b4e326eabc4112d91c9e9fc449434d2b4a301dad121360443c65cccac02 +size 7594 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22a0419728bbec43af2083468caf0c3f200fa648 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de3f4d3ed87fc08df8800273bcafe06940aab91b7f2eb5d5628b901301689e14 +size 17019 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44edbd3331fd9802a46056a43c574f032cc09696 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03fe7dee09dffd5176dd48e4ef55cadb5774ca0711346040fbfa20d1a95b5870 +size 29806 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00c72aa96b19e92cc27aaff93be11a05b42ffd4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a989ce7d0bf5f3e10754c71b7d61d5c61c8faa4f99904ee40f130289e8e193d +size 39846 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6adc054a3a01e9dc0ee4862d6974d2ba4b37c5b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6cbe4fbe8fce57fcd3b2308cde214316274cb95c85bb4768e19e16489127318 +size 26763 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdcfa6098af8017fc898c94c13b0a8ce5ab1626a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbfaa96205d7fd6b57f836c313d55bbb51a7f54c3c48ef3a5da4bf86d2af3e53 +size 61240 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6fe195dbaf248bfef034be4ac5f2c7ce9fccf51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0c456804b59b8afc942934c6044a88e4cd5250c144ef0820333003530bc602f +size 40361 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a458b74e48a0b3d91bd6ec9963e042b69d5a502 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4dec77365965428ba026d55c24af036a5d3cfbdc0f87a1dc723ec069a863477 +size 23789 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9288fd08e97e2c40b6692460e5fe5465d12f9029 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d574805d5249014b2c947cca2a96f3a68acdbcd5687394bbffe95da45cff1e16 +size 31026 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41d6534905ba142bf7906ca42ac01809c3e94523 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d22eb225528994d351434da1d9a4a94b17fff4ffa1cac733053a3329421283 +size 22919 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c536f65bce0ba5f297d79797f4c3d9a5c2463f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41cb0c739def0efea138ded90b3d0df549bc5c7ca4a184d1885e232182e78298 +size 60962 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8536c5e320f4948a1ee4616310414c212ba7fd76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86a6bbcd85b18c831b608fa62d5882894d86462dd1e3d09d8659b91304e32f3c +size 25731 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0583ef24fb897e51a1083b130a9620ec0bdd0e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:609f43142ebd8df91638c0e42e7993c4ccdf7959ef80263adf4840b186b8524b +size 25755 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd2b30e81e08bd2d0619fe8be73ec330de9e8b36 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14ac538422b850a4c8942dacc309ca0f00d4ea81d8e2add45b38b0b06358730 +size 46496 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a60daf7e73258f3c439f5e13a6f3a0acd736ed00 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9783614d54565f417a108298590939e9cfc231e8a824d77a775333f998bbf48 +size 31472 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1217cfcdad56a2dd38d22ff59fbbb6b50ea03f22 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:783400b375889e2ea001c3b0bf10e6c870b064fad0f335a35f45b7f293e72d59 +size 28786 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30792c3e3d64bc22c623ff45348180b0c247aa2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bbc9a952739546f06eac4fa3eb113982052e9fdbc2d2e107d338825f2625174 +size 74451 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f64b3caa5f7abcdd8e75933b18e1d9cf874b86ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4ab8d14283056e360aae0131b62be33fd5f78f5df134064555ab41e2f2a3ce +size 30063 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2757060f4670b84476f5f11a9013b505ff01c5c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16b34c996428a232b022d10582181e73a31d84bd8b6fa35ebabf1281e42329e1 +size 19105 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6918f02657602fb1043646d6045c6f8bdad9bcb4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ab64637436bfcc8e4034fc293eb6b1ad5a34d96d0b2d9b967ab56595327184 +size 87906 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f08a096f30d17a00c0d57c44f43899cfb740e0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb695a86025bfd8cf3c51e52e5494b67f09e042f346df6e05052b7e480da148 +size 49971 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2194004b10252f5c33e8ad0a09a2dad85bd8a70e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e3598e41f907a835b32f50b93c58510858bee3fcba4e02bb81a669bed8cca3 +size 31573 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdc4bdc557c8cea3d2b4901bdeebffea865c76df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b9657851b3504259e873cdf30dd59b6114b9d2106680a2e824e6ef65203f263 +size 145334 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..335fb3e2c14c02c44ea9ea59210d21753068b5a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5dbf937f473ac2de63194d557ec94992b3d26e19eacfae6e4473950503c334a +size 44564 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8848ed6c48f1269799bc04d27dc45c4d3519289a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c70fb3043dab573f1cc5d97231b0dea0f5811f1507ce448fb826166e41f4bdc +size 54450 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e6977555c423d2fa9c740f70f403e596be83cdb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b98293e1c17e33f044c4897b4df0bdf5d1d6738e22e6c4cdbc9665b7717f7bc0 +size 92720 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d98602542ffec983093eb7384de72d0c7b9a8cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c491866ffaa41d11fd311edd0d40aec48f331539e61acbfe1dd937f5ac3662 +size 56976 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8df7579339779ed4be34e03f35a2735925af4ef3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ef6a1de3c5818557fd02e743407bd4f8a1fccc1616a11b6b0c49a0bc63612d7 +size 58463 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66bcb2a40c0d81c759fde502ff90eff3aff02400 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a67582bea57380cbadd87f8030923838326d3dc3b82c2bb7f4d0f78e3958d3c2 +size 42784 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3de0c8f6d49c5699e233d6b3bb25ba9346e3330d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed3b790dd547024ac37d48fe86fd72a2c7ccdb877ae0a0c8a573a963b7f3cd1e +size 143835 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd0ab1c87b91db808a5693256ddba553f7f8ea70 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee130f2aeca5c5e221c41a71f2fc647a1dc404684d67f41b895360899de33250 +size 71095 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb586913d58a3ed3dac9ab501eaf6019db263517 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f4b96443fcfdfcd8d7f7c624a3c2d37a8dd7457ab6f602823472d5d779d0da1 +size 162048 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfbf4f7db2f40c1d7f8577e495286827c4361c5b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f477c5b3cb0d2c182ea7121e94c09385fd59b2a42129c9b7ff7f1c2b256b2497 +size 209314 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56ffe0963fa20fed501beb4d036f1ad6bb85a4fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6401a65b922d78c786cb49f8e3aa7f657d59e555c853e8c5c3e7659bab7fca8e +size 49885 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e463c840aa6cd90a8c7e4bcdf01ccabd4fa51928 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14bd0e2c248c8f856b0b1325c60cfa8d1e3ced5e0be69e789927e7d392377918 +size 31505 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97a0d05c10fb4596150b379ec5f8365d7cda82d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fc563bc53d18f0074fc6d17eaa5f3a21e42c2e69f0bfa78b6c6f8a3ac5f9020 +size 36054 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b18ab352e4441c050e22eca99e75f22ca44dffd1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78771328f6911d8478e14e2b4a7172fda2f60f4363828f7fb471375fd2eda21e +size 29491 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca6a6355399fb1236d4d9a914b455c606836a1f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73ee52062bced8453ec903e1043b568d6c0f1d6abee2bd71e519202657719604 +size 40316 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac722e2ef7e493b4b80e801704407ba1178262cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:701b1b2849e772db32331d9691e3846c232ddd7b3f2bbe86b2ec15da5a839cb1 +size 27008 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d51ba168c070f08ec3f7de8d882a5f87d7d6972c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9961a5232c481f802b388ffd652e0824d4d53c457cf86d591ab62ff73ce1e8f5 +size 21706 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d8f96c1fecb26a8f2e21b611452edd84a4e42a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8da37f6719150910f859be44314594a47089bba319de83655c34d1fbe7a5d288 +size 57368 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14c28758ed50850562f759c67e601cc16b3ae088 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab76552f422eb760a450dba753f09f15e1c8a5c8ed69b503a79c09b1129209f +size 22265 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b54518296ce8f4b41b4e5cee2b48ce76825e8a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af8990882f8f2f18aa526f74b9bf78aad733594a9b1a3885d6dfd4abb2bf002 +size 169101 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dde35e77f3599180e59f94d266d2385716024654 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e7cffa47f6bb8004fa09edadb808a5832256213f6ea1993441661cc907aafc0 +size 90179 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..114686e4e393b41ebd8f27ac8c3d3f2d1a6f3312 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f628bc5b4dcc8d3e4bbb56d6424591aff979e45860b67fa6fafea7cfb661658d +size 154351 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f275a48e3a683974b46a950672ef90fde3f32f35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fab1ce966966e4813d00f4cf98e54dcc20fc8c85ea202f0b7e4189de2d8c2f3 +size 79397 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ac4f26c9c619e64b6771f47d57b895f60242b3a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bb837dc06ece04f3f375d41ff4f4d5821293ede8b4b41e040ac265a3b83aeb +size 74378 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2800ba637f4b41e9cf8fa69d6462e782e92dcb5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7090168ac98006a74e55dcac4817a9a9269e5d3cb1be3f2570b45d59a0ef97d6 +size 82517 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be11f6c0635fcaad0f95af25cf7c5d795fcc8a20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51bd78eb0561228c4cbf09741fe4125ea36af68b41032a056e93fc49ba82f880 +size 89803 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1830362277c13438a0dffb1dd5034cf4ec5cbcd7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:332a8762f569e140f22e2765a9a1559e3d79b37ef1cf6948c97bf64f79f13d88 +size 1003676 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4f62b02bb4271f60a784e5e0252548a73d14b05 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1560624ead6fe69a150d441c10475f394f6ecf6ce0f03a24ad019e844f955054 +size 132933 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e7d97f0ed2d05524843864cb8763631e1cc103e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef49088619f90d2f2c9e7ff4ca9fd2c74a512d5c20d43c1da1208e8f8bc70158 +size 182920 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5e0f2b8d2c77ab59b9ac3ffda48199650a4c763 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be4e49ff098368086b8d5b5312c6bc3cd927cd850a1e6ffb356eb0a91f2cce26 +size 27481 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c9e97ac77a77a0c4a88f34dddcf283c518f838d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:950b4fb052af18c81d045953ae675eb2c83ad624664e347ccfaf975d9ef6cb68 +size 120431 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c7f23ba579dca8540759f01df7c033f88c9ca26 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d585b43263c5843a0c4ecc34c6511729d9b6415802dcaf66744d7cbfecc3e416 +size 58011 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03f866db4225a4ca775e1afca39002062545f048 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9927cd3598942377273dbbb69dbb92516383fa2875eae0860bffec7074380ee +size 25426 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1b402cbcbd2d2ec219051e178562a3b7519248d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ac5c293420fcb73bacf05b79a63d17fd59ecdcb4b05538c07cd4ea83364850 +size 39431 diff --git a/eval-results/mmlu/0/ckpt_240/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_240/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb3fa9352aeb0e52d87754c3946c8ab578ee4b2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb9fcdb1ad6f61c4bdc7cb031aeac72357676e3c64fe2bccab90d40dc9d0eac +size 32900 diff --git a/eval-results/mmlu/0/ckpt_240/results.json.tar.gz b/eval-results/mmlu/0/ckpt_240/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24dd135e14bb96cff300145cd1b7d94543741bf6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_240/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2e164d782777df3cc320e6da1b6ddbfc98a38f819e4eced6f764d484d005b6b +size 7655 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32b8aee990a39ebbe56f9f7a7986e8d1fefd0231 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88cd85f7d1d69dc7d6593a98dfa75136510fb64ba79d224f41ee499ec8f8a3c0 +size 17023 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a56cc92369f4878bc39ae4c3790bda22cfa3bca8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60233fafe9d471c6f3a4e22f296aad7981921d66d3e0592aeb830ac9e221d65f +size 29800 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10fe80c23f1627c6dacb040a8e50b2c8194c2f97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d115da090afc9d9bfe35db7d61e7f11861838dddab1b6074644a419259dd8efa +size 39845 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf8452415a8bb03dfeac4e94924666f824d643da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287d2fff40a3146469f8482d9b5c00e79ffcfd4e44ec7e919e95061d2dc05e2e +size 26759 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..661c67a14d6f271b63c8078dee9f0ed293c1ad1e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9955ec2f9d28ef62d9298fbd6eb754c4d1731c341727c8171ce89eaee2e8ee69 +size 61309 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e72cbc9e69243aeb5e622f29c52b953636b6be74 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2e72895844202c74517b89096c5d68c9ba187f35681f9a5e838ce161f2871fa +size 40378 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de3cee5fb8ccd78c3adec47c15d2d57a0b2bb870 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4b102a435ed60babd94c572b79e048144b361b0ee5aa369a5ec3ef9a8819e8 +size 23789 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28277c1cf5e8fb8d5ca5931fafc0794e510af5d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6d2a977d88b742a26c4f38a306c7e7357794db915e3c13bd708d7268f47e475 +size 31069 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3657a0729c609bc15cffc0f97353557cd6b2a20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef66260c9bf1e39f3b70fd04dd88f6f6fc95a6f029f49d70c97a355bdf2d676d +size 22912 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4edf5efd10aeb043ed47c4729f1b1741a66a414 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dc9da11a902ee79cf82c5438328a5c1981afca6573018f097091834c3671b41 +size 60930 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22060d80a30b01b66228a5ffa1046356224ea34f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2393b4fdaf2e89525f3e4e91a83563d15d1d3530a5b6281232c1db48c5c6fabc +size 25715 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a999e68f6dedf8af5781d7f3e76c2717b1c80d04 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbeaf8335229123f0c1df0ac24cf552707792237672ad9842e3aea2ca75ac006 +size 25789 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c8261c7c8762e99c38b67b27951c25efbe35735 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:492cc80f7852051f36176d13f3141a058d387a07f05460275c7a2360c6a14017 +size 46527 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f28153b14a861b3af30d04c8f3a76a20ad9b6dde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a020a5b70c128cd21053e9fc313563ba26679946223487a66ac526571aa8b22b +size 31469 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7c11f8cb568acdfde8064c5daa7a1cc89956f28 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d385b72188975323dc12c6f3ff3b90109a56ff35ba85cf03036c553612b5fbad +size 28786 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da59f497dcb68c488b3c8b1a1f36322b53fc57bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e77a96301c11459512e030a1d219a8fa15e62e985c2784bd7ed72f9a2f4f5d +size 74569 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3dd42915a99510e36c00aa41620afa4e2505d4f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1004bc120ff06573fc3a72bd3c11a04808c505c20a696583c9405748e33f2416 +size 30080 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20ddcb86fc79969484e50524abae3a4f9291e5f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9800180b9d4fcd8298e5d234296274afc45ff9468b3f9e4c4ecb6368873c52d6 +size 19126 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7dd2a095a65d5a7866bdd32cb77f997d47c09773 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc9d41fb95f3c0c1b8f56c1bbcee88cd3cf0ac2d914c79bd22cc52a7f28be4aa +size 87990 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de11fc46e4b501bf4e3133275257075159feecca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90daf6a1c5fd4505c8d8149d1a189ef77a536cf26142fa13389e3f153dd8b966 +size 49982 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..957cb173547c2f79b1ac0ff2deae4ccbd8c9d2c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4abe19f6485a7f19b086e5883ac7964c999354a84f3a40f43244345097a5cb +size 31622 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf663494b8a643aea6719afdb57dc55fe615efb9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d95780d2ed71d98cade967eb3bcccaf0ffdda4c3bf44517e5d4aa145928ce6e1 +size 145465 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27a59d550b8c7508fce3ed7b0f4084c4391711a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:505d35a71e3f12b93e86f4b6c7d5a53501cedd8ed9f61bcba3dd1238b6c49050 +size 44518 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4bfbce0debe60c3d525a0a72af642c564f9b0f64 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:718b5d324f84baf1c3a3c2adbf8691ea7df7473d828fef7e7593f54787cb2fbe +size 54400 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4663ee0a263a3c7261897f5f821e695c5f1b913 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f09b9ba498abc604cbee4fa665ff710191824426c587d25e5da7207ed2f27a7 +size 92789 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..999a9f8f8db94cf7e77b9a9204bb6cf4ff418189 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71e4290bf3da7e5e0f315277eed13f7775fec25905ec6a468f7bf0b31b7ebce +size 57007 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99b9dc03f87c3a8990120a34c5fc087f8754e4c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00f9b1df3f06e07c86d76604d926cfdfe2526e12bd533b5fdbb92a603e8e4adf +size 58456 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65bc0c457d69df3969bf853db54aef4a5c6a9563 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b970674fe1008c20ec15e15d782986d7615becd733627ebf84b35b7f5438d2 +size 42859 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f32d33f20a2ad9971214c6c4560c78ba1b12aae7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1085cfea47d9e4ab3bc3fcab28b04dba565ab1620d2fba0cee75aef17b963b64 +size 143824 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d6480e2f000b82f7e389ac44c3a05213daf0d06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32530a51271433bbd345960d8bcc28cfc7579a1cbf883afb62ad76f9c0837346 +size 71137 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ca2289ef20cf4316a1d357ed8981d4612ab1454 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72db8f96e335455d51cb9f4e4537c15f87e44738483887b56e83b5cdff21fb48 +size 161948 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74420d3d5829bc1c2819a9223f72e83d67c6c631 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e4b18e8db5a31b781f19514c43680cc733d07390f4d3dad70aab5c031c772c4 +size 209533 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5efa94886dd111139b5e088145349bfda895639 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7ab4ddef461a0018ef62730571b9deb05602c6831d69bfd683bfafb2a115555 +size 49955 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c5cec2782d98f51574998c8266eb39db0c08e50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:121aadaf10f840bb0fa37340ae5325aadf2324d7c1589c52326fa998a8dd3bf5 +size 31551 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e9b2feab65234a784999721e1d9a1dfb0d576ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de628f5a9ed730c7a3c7900d838d4128c2fafa605289858a6b52fc418e6cd524 +size 36085 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33819a0b17d3e0b51275ae639bc616cfee9530c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06c3c244c2da7f41dbe508c6d09c600d8b92e638154446e112d9772570fcf71f +size 29476 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0935b7d8e117c5143635f8d564acc25ce57f337a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8545213911b643e65babae4249a3c8c6a1865146ddd6213ef0df29f29b0f01d0 +size 40287 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74fa0eb3febfccc893bd8e165c589d398d6c9510 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ea22eef935a3a0493246cd5a09e71f7bb57ff82fbdc0c900f25dbf3ec25935 +size 27004 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b28c8b830f25ec82b14e510991f1d288157e99d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5790b714fde87306f2e008ba1d143d800646fa2c55354f4e595ecf551b1d355d +size 21669 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fd9baa7d7153f81598de2f919075d0c272184bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f25a971092a1719ed16f3e4e14d997439556586afb4ee3dd63d52144e601178 +size 57393 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99f7273e3ce4572d146ff9b6b1ec4edf70cfb5e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57b5a8e41bd14caf6a3a5011c9d10ba40ed602abc6a94dcb9e87467cb4ae8adb +size 22283 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29d5eac5c8ae4f8a593b79a9cee75869d6eb59af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51d2265c6a440001ba7f23ec894a2da553a046cb1adb0395f13f20198d677064 +size 169091 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ce53e886bab725b740077fae03d2f3ed82841bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aebff618908e9d7db33e5c026c92dfb68909fd1bd728ae666aa2084ee2505d05 +size 90231 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56e701971730674a5b9c7bf59c479f702433a6dc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c4d40393c219798f6bbf1d3ead04a87c62f1d593663306c6187647acb84d233 +size 153812 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86fcb772871cd2a644a7c54992fb1af15f527dbc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd3c0da1e595fceb46799fba113ad4675299bd7b095f0843b7785c8fca00b26 +size 79428 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb9f7f4cd6a3048a5b678744561acdbddf37a692 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03328c47b13ab27a4016bac8ad5dfd9d90a2df4970e62e347d3749c92793ec38 +size 74379 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10739352a2dd17e5545777f07c51b0298670c19e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b6dafaf2ec8a7ee44a62c6c447e6eb247630a5f1113ed6daa94790ffebf5c5 +size 82560 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14db15af2f5f644234016479e95512f58b836964 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42481f4c7f237a2eb82687f3a9f9066b6a2ddd4a859f1e1dba8ff14c28b7ddcc +size 89879 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c6048a33f86b84b655b39956518b8f4ef021581 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e7a939dd8907f2769ae26c9910beac22db95ce34b39a1dcff94c89bd7609f35 +size 1004189 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26d7adb9e41cced2f8995b6be66ce450de783d0e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08b7ee78b8ff270f6217568bb01a876b0df2d016314551d817e96b655fb1ccc7 +size 132942 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f4a827b73afb7e268dbdeb671aa4fd28fc3b0e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c13515838250c7f9e642fa0fde819d353a3b0e18b4a486f0a77b37199c3dd7a +size 183046 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..170d4a90bfe57ec20ce18bd8fecc0f1549d5519f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd1e35e78402cd35bc7d27930b22f3c4454903c2d738730a27096dedf21a65b +size 27532 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39b4e24d1eda157ebcfc6442bfcac9ec5ba8b3d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d72b2be8315803fa9ea25c59736eb59a3ed001259b1d643df550bb79497c8d9 +size 120441 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e010ef6a2a96da03ea99a4e38c202682317d59d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00427197ebb1ae8651fac514b47f8a495ddfb6b4d4b06de3c603319d426ffd7e +size 57962 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0cc7444b7a4d6da86b40870a7afed6bb909226d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67149cb120c9156d15d99b2ecd49ecfc74f725622db647843612855edbd6ebbf +size 25437 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fac68c0dc5b3f037f0fed29e9afaf3d23021d8c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd9fe861ca8d3e3b78fa9febadcebb0f42f58a198051b13fababe8d91ee77f32 +size 39452 diff --git a/eval-results/mmlu/0/ckpt_243/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_243/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01b75a69b69e927ab4edf88ea494f70ec1d317ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce340b88fffed9e7a6f9139bc32eb2e87bf48d76c0361cf72e88f4ba39e066f +size 32915 diff --git a/eval-results/mmlu/0/ckpt_243/results.json.tar.gz b/eval-results/mmlu/0/ckpt_243/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37dca453694d7e2dadc87444a841b5b7b796305e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_243/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81b25556180f5f4fdfc613eb2bfb2d2fb673d12fb8cbaccfc09f86d9e9440363 +size 7623 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e645105f06099970dcc6d6849d7c6f265c05db2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbb9e6b49a615562485b4c2989aae504d6f063b10735eddf6c8af7efb42f0355 +size 17015 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..711cb1934576426c79c2541ee662a24501748968 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0edb17576c9e8793467e5d9378ba54c005ff929c74c9ce10655d1461b296669c +size 29852 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7b0468fefd959e8d43ff1045513c7dc4d6430bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb0d62d132ae38e03132bc3c4ef7f88f56b73d2272e7367b506d21b780f5071 +size 39845 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b868b602e0157f38138bff986889ae4ba5d2714b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:234afb0ea14c6ff251881bc9b9a85c4086603f9d12aedf72852f50ece9f4806c +size 26763 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..719615b106b39d1d350ce5e3f8f4a21acb6091db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9397ba7cac913b94401d5ee6726f3ed11c58bd53273839a44ec167d6ddf9b9db +size 61279 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..370b740105217aff86be056979e4cfa495147df7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6182f530a1fe9758a4609154f026507b02edf821a27ef2fadbaa8a2768aaa2a +size 40376 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3eb8dc61fcd1543b20bd4fe6adad21cb9d461890 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c18d40f59179205c1c086d65aff6faf109e8614b8cdfde02feadba184cc00f1 +size 23817 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e861e32f5d8832bf125797e9d6412bec39475a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffec2e102fdcfa08bffe3efbe2b0651d903c77bf686d717a2a5aa034a01e76a5 +size 31088 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..845d46602f0b908f32b557958a988e5f6575951b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d21054ad4e0393be8dccd374438567f705fa78a980b505c1b2117cf6c37467 +size 22937 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23c346fac3a38f2cebfb74ad2242a1f9e1ce00ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d960bd47a5d7e129d4ed986cfce4c028eb52c2bf13d23df400f2d802c22a130f +size 60952 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14859e177942a1cad86561835c62b53769e9ddaf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b49551513aa8c87133d9ad49cd9fcfa991b03a24e08f312796404020020528a4 +size 25745 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f06752302a8c67f084a409c92e0dfeeca01987f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10f3f3046581ec578891ae13b29aa773a20da9c59af52b35c75b004247893fcd +size 25774 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ef55aa1efa8605b3e5af08cb74fa3cc4cb05f59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:486bbd340d98a5f14fe2697e441e2c3d58828f2b8e8aa2fa5a37c4e0645ef7e1 +size 46547 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f55eea01e69ea77696ddffa574a9467ec314eb06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aece11626b241128721713932db473cc628d0309dca41dc36e48f77fd5eff5b +size 31473 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0141a6d0beb5237d9b5b2ae8a0b2fe33113b72d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:782e1b37485670ba9dfefa80efc7696427f7ea5ecae49f6e78836efcb98d5fe1 +size 28795 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66349b72d7d86ee13dfc93ed5b7cc31b576f9436 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68324d019ced0ecdb39a57e93e5fca298acffbd42d23603c7a84568712e7581e +size 74638 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1e6401a3e288bc4bbd8c5692923ab5ad179544a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d451365cdbe2f6f843d944304b92fb833f01c56388a2239f8886b21bfdc2d6c +size 30081 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6a4a8316b318dd785bb873d8322c958ea249276 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:492d754ff0da1b975f2e00cfd2476ad718833fda3f29ee76bec9dd1d059c429d +size 19150 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b15e6d8a14aed461c77dba257450c71ab6cd3ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1315782e2cd8a1888972f84d08b7238f616cd55bbe54485a5a2bdf2aa8d5e846 +size 88056 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ceb069a265a813e5c615d741b7a8923a899e3fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86e7439e01a73995e2e6b1ceb834f90c3eef50063a1d96486ff29604de263492 +size 49970 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41ee81d88a9fc9b5f0c7b7a2413ba2d09cd370d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e7b294bf8467c80d1f2b836993e88dfbd66f746cc724eabdf929efd1d02b669 +size 31625 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b7c7cce9682eaa90a37e89edc7c30b318790e41 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc5725fde97197f09546febbf2a2196bf4e18245016ada3145c4a10f239ea584 +size 145428 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bac9de2e6b6035f3f2440abb1ba1892d7550c62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:486353e8ae6b99ebfdd8481e5cbaf3d413cc200402bdc8d4d5fe079ebd948180 +size 44598 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b4e8b9e13d17fc87a1ea35cc88ccccc6b954a43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ed5f6cfdbd897ce5028c0fad7813333fc3bc30b41b50b361cc48fca942bf2a +size 54430 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01c3a8620c87a96057e15226386451d942c778bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67161492b51b9b6771055e5c46dbfb7e16ca18ea7ae87f0b3f8e948db623a310 +size 92782 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0927e0d8874d0a98fa721b6500098edd0ff296b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc107284ffe1ebc56a041825485c9b5eccbcea02d6e5a8808d41e93ec607838 +size 56972 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d4849a33deedad0e6213181623a89f9b0c9c0fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfdbb2344bd5b43d433a94cd6173952e4a777d63088104ad0c72eede492a8a9d +size 58486 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..145f7213e123375a66b858c9eff6c6271a0d010d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25fa0acf06580fd4fdce6c8be3183a4c4aea86bd46940af8162c74bc7377e560 +size 42870 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90f1e0106cd5678f93c52cd0dee2bc9647e31730 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5168fac9d0a14211771ee78d444b0b9ee67580ef2c8817ae6f924190cc679325 +size 143844 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f786589b35d245e1d76139b853875ccbc0a9f020 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e24b802b58d43fecea1e3671658038c827d6a29ec5a666840533f89308618ecb +size 71200 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08d315a4912e7531e1f2f10dbccb199929dee6ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd98ebc5ebde00aa600e1fe0daff52405ff4aaf9a412aba32e3a3e48ca74bc9 +size 161947 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c5d1a9578228c154f3224a48585e921aedce75c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5f995dcc1cce18d566ee98c8c57e4b6b3a82b28b3a365eb169ab4265bd9a860 +size 209565 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3d80081e0e64214f77581a00fa40afa6b19a997 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85af15300bb91f87b21d3f28456682d2843c6d40b78e2bee5c14ef9d9334ba6b +size 49858 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a863be5a0b0b9085c7183e12b199e0f67504cc7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca680298d7bdd768d87f90f91480a93ad78ced101d29a1ede4c5462edf3093df +size 31567 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a2eac37767025accfe7faf49900384bbc35e389 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da022a4ddbbfe2ff977be8c22290ed65956a5ea0b69b019e3fcf0f4c0b66d8c +size 36082 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40036411f91272f0a5edd5d28829f613651c8a01 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fef33339e50ab198aeb631c83c26acaa05e612552c31aea55e499f2ae9fd0309 +size 29476 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3e50ddcf462aa68edb42c93a2e111a54d0c7dd4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e98405dedebcfd88c799d0a0bde6698ee86cb2cafeed031bf48c91fb3839ecae +size 40400 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..084eb2f05cec02cb89fae1403e71cc892ee8bc5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17a7ae0a0631de9c1499f6a936250917a509a8cb9e1a9a7acbf5930794e15bc1 +size 26982 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..170fc7914dbfcbf1a37836f6f798674374873154 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a55c510f4d44aad4eaaff31cf27c3f8e335d8f79b56f6185289c221a141efd19 +size 21710 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2570bf820d3f9c7aff1d34184411df244de08242 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b16d38e3b41c185eae7839de2dbf8dc09dd161855f2c6234658080cfe9f46935 +size 57404 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a3ecd44f88afee4b36ff16a3b610b6205109962 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:321d9167e90c1544827ab53099c4ea22d0186c5b86f5b25c6803f86aff7c37fd +size 22292 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0aae6c565abc1eea722f2a5e8bdc9129a44a6e0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2b38c7846070cd85ef0b5e0188f995fde4f97fd4f6f79041cc8d6070c0d782 +size 169217 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b10d1fdde1a46f306eb5fd781182fd56b3f43683 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45625ab076ec669f209fbe0411a8e5194a13a9f9f42af6da3509e5aa7bfe2702 +size 90274 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8fa85cc6ad06f9f2c921b6ae8ecc564aeb78f20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:901262447c2074988ce6610ec6d70ae3e17e4f5b8f14c0a6c100357f4877627d +size 153554 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cbfdc5ff32bf718e8fdcfc0cdc4344d8905d3ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffa6f404ecc9d169cc6f356892b9acc001d0b2ba29a5c21683f0255e955449c6 +size 79416 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4302cea43f01ef64d25b5dd53be0480c2af007c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69124bddefc28661295e4e988ab52bf0d6ed198702fc992bf4ccfd14c626f642 +size 74340 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fd1ce55a1c40aa3ad02d91d8d760e2df9bad1fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3883a917896679c0add87ea21bad4238209d80fb947004bdcfb99ecc25d0a40f +size 82485 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69404ffee59d2461b2b5691bb2fa62ef2e8f3621 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b98e21d2ca73372e590eb89df45c952f7c9496fc5490294ef4ce8ba4dfdb75bd +size 89941 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fa4359090bff3d55c7fe81161ec5cb31dabc5ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01329f5b7277b7613fd79414b9eac508982157405c252c4e887b465d4e3d8f83 +size 1004458 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e037c6e4b9ffc7cced01be4dc0576bb5fc7c6ed3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdc6187c3def1f105bca82c78b1ccba8a31454de931111d32945de3a058cbe64 +size 133056 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c22e70d9bd6930b569b1630d198141684431f723 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b2c68198de6e064fd2434ebc02603afe66df6ae809bf504089028d20070f789 +size 183054 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02d4174a0e45bd7339366a5133c11cd21f416aa8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9508923eef64eca3f30cfe1a1d457d575cc1142dc93c1717b152de704330d103 +size 27555 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9472d501a0917b31c88f4db4f77cc1be1056311 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2917d8155f61adb77b0cf276f9e6e6cff4dcbd3f67502e46bf4831a426661e0 +size 120397 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1569808a99b5a4a745e44cf813b65783ba19838 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b1f6feacb00b27a64c1a3ad6df145436f2e2443667c32e6684f5527f4a2da8 +size 57940 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..326598a5f9d0fe1237d1be2fc3219212a9038303 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71fa37f344195b33ef650bdcbcdf7c90319b994e6f81e43734604cf6c5431e33 +size 25389 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5aa9abc5ba2877d4b4f97ea780c6306c3f7f2643 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b49eb02d5b684ba285d87cd92a96f4623a7ddb6234197eee15e9f2cfb61858 +size 39441 diff --git a/eval-results/mmlu/0/ckpt_246/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_246/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..025dea2b690cfee80a9e9dcc62a02860d348427c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c1b89ba3cc55d5571aad598ab6ddf8801b633cc0d7a82305ac91666cb059b8 +size 32935 diff --git a/eval-results/mmlu/0/ckpt_246/results.json.tar.gz b/eval-results/mmlu/0/ckpt_246/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06fb2d65a4ee55d0526b6289fa15483539321507 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_246/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5316de654ec34b7d4eab7ffdfeb1a179568740b4262c84b477807370ea268d4e +size 7622 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bcfcbec4194d7d1e5e2fad700618314de7fd1b2f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc9f9c8a43549ca8159ba789ae3a6e8baba0e971b991fb8ab9edfec1cd441113 +size 17028 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c28186131a690a59bb1caed071a1f01733d8ade --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfcb50768d58409c94a4cbd130553bd164f2ea937232ac8378099e5a560d2042 +size 29809 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..241c3f3c2edae2fdb539afbe2bad2048eb679781 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b4ecfad8848613cfdbf8b9425e345c87a879019f7b60e645d5d6dc79b523521 +size 39841 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8614a55f9d040c17681a0797d6c860007eb00bda --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04216eea4f03f17824610b72b33a9cb8b38c5712b54c1c80d6a7fced40f15ffe +size 26762 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7850e28ae9cffba88597844f70855177c70d93e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48cbc511993f43e46415722b34d32754bbf4649ea156f2e091c51bed412a9eac +size 61248 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7407eec82600abf2cb13b5b969e566e99aba5bc1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a5d3228992c3f33b5b30eb1738617d91038e6dcb8240cc4fdb8ff1caa5f82d +size 40422 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15e640da54069f2e0aa7d487937185374765110a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8171f8c8a65056b90b4cc65b3af663b9793c1df7636579f09d9c97a96f862fe +size 23785 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6d9ad059012f7770247bb08ee93c427ff019708 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6dcf1bfd2fb8fa7e7a9a74c04d42c219c042344f3a6e0b35c4ada2e2a9d5ca1 +size 31093 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..700f2fc6dd502cff0db26a57fab332af5971d48a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af22b34ed80273c5f720771c59d7fd7663f870095ca4ab2ebaead46b753faff +size 22937 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c83b597b8fe53ec1fb83243929befd9bbb67c78 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3ec7d3fbfbab86327a1ba8f2927541c0f3420711cdfdeea182a5de6db77f3e +size 60946 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a34099619fa2d910a52efcf38d440f29d8a43539 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d427c71c1bb0b995ea8c9787ad79def260cda342303f611ad6b8ecb2d97b534 +size 25719 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..997198bab7a89ae68097f14c435c714ebab1087d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecac2cefcbce259203141301899ca8d531f351a5840817673037d3a5d63cd167 +size 25795 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40456daa809bc01ed6dd0f401edf387d9cc1dc61 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9fca1c1ca99a3d1f3b6c979bcca5901ecd2814d03f80167fe7b092d7feb62c2 +size 46537 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c641df32c0978ad2798552fc0d877f4a6bd4b0d5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b89787d4ac8dcbf0ad36c61dcfe70bf74e7755bd739e8afc8908afc4e9760c +size 31476 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6915e435a2070507ba7f42ef1fd167cd597785a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c50c3a2224d343a49ac30f75bf743b1601d4484eb2bad45f483dfa3adbcaaad5 +size 28824 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64c3c1a0c523b2230ebce13520cd68234ad9b479 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91cf4d28136b1262dae3a47f00ec3067cecfb915127e52239b32edb83b3bdb84 +size 74502 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95ae51fc77453cd92b575553442a7e165d3c7c53 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dcc819919ea854ee6760a420b340b375381c8853a9fcaba03219235832dbf8a +size 30115 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b4d56f5464cc876539d67ba34f11c03e841f503 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3a0225da628410ef8933ba294788a6503beef800001c6ef5ee617a43d58ce9c +size 19093 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10d5e8caaf2cc2d60c5fc0f448d7fde4f477548b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c914b99d5fc3743f546328e81f28eecacfc23aad87e98740d01ed59f51b9739 +size 88001 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e495f91e38733a0212bebfc3b67538ec18777b42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33770304198ed87be50c2ea22aa3d46b6a618f55f8eb2619799f3b340beec00c +size 50002 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbedd4da9331c27fd956d2f91d677ac2d4bcb4a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f06dea41625d05c74c7c05e49e08d97152e333f85a77316642bc2d8c201384 +size 31595 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ba409f0898580dd39384fb3576f647e21b05b26 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:740da40940731f952780320afc335b520742408d7e3bd246c89dfa9b93224803 +size 145450 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fd04104309d700dfc6c7b84968c52fa39a59645 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ea52e65eb8f8a39fbe4ef63a73b6e7874d75a4ad6f719936f208a496cdea1c +size 44600 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57797659dace890906ab6b0755bf1072ba5eced1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b338e4c053c8680bbfc64d6241cd5f6429622855c06cd0b355dd40396679c589 +size 54386 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d18ade6a4925f17d176f9c51501b36d5c228374 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9b661454ef75a98db5b3e8a3a92a75c61b6e7e0c18c59da3a2232a249ce41a1 +size 92793 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf5cce5cf6674086c89d3bd279225d7d4ed73e73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0746e87ea6431e907633f7d3fb34c8ab514ae18db80f40868cf5c814b8bbc0db +size 57029 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97c5a6b719cfe51c1a4e6ca74a6ae9aee0370d18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f10953baf1924bb5a5eaaaaff6822d602496d29eeffab8b1025a6584d64ffc +size 58486 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a36b737394b4b4593a8579c6a6305e95a6287286 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4614a88911593fc622d0011fdb62d11490de9a3b39f6a08347cf959039c47fb1 +size 42813 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f83cc40fd00aefbea44422a1d79450c0a2dc9c9e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:708a6b2ef1eaa305a78bed901faaca39cf27aed3237515bbb962017918f91d31 +size 143819 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc5882e5a7e9e41096e955df5992af9e85c1e982 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4f10822725b6264a4dbc3354e92937fba965c4e4affc9c39815cd78185e418f +size 71198 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27601fe7563a2bb2805c933954f930a58545ac30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4da4884ef4f1ccd91ba3f04f1c0a786333536a8a7801e53500defc6f1edf5c53 +size 162026 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c962a20a9d7a1eeee49c50a7688eab356dbfd3d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ca40dba5a4c747834fc6216a0aeb47decdeac45b8f77d5a0599592b423b0c0 +size 209508 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..744fc1e06142263858a6df46c1f18a5205495c18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f4f61700382b5f733ae573d3f915724d44401e791f190f485bcdde8e35af7f +size 49936 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc35b2eb3a25479a254a241ae7c96658d1fdb1c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca7cf2a8b2ca0a6c7eef8d484122707b9893faa78c52da9b993f9319080896b +size 31554 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5d94d64dc9510212db1634f44704fd61be4e7f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8b6e12f6c3d126bd5aa45976ededbe4513ad321a6fa6fb0e5443ec4b6e0fcf4 +size 36073 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0d4676a04cdb597b7173ab8a0dab44c136252ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482f6ad2f29df938ba2e04e1045f00b2f9a0cd6e27641f1d56618b7ba0fa2576 +size 29495 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0bce96816da3c317a1d29ff1473c1a4dfd1648d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ffd8878d31a81a40eb8a297c30028e46001db760530e5cd3a84f54a7379bc96 +size 40343 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fe6c30eed336f4d2ef6cee505f0fb9fbe5d32e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da8526338ad70eb12fd867635650e587df11410f2b01911a0fe5c26dd46e6e5f +size 27018 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ec4ae2b7c5d2dd3ac851aa387011fc2259a08a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f118def5ae78251215a6ef87ff92c7f3f44468f745fc1b41e83718d70bbeb973 +size 21733 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df291d5db7c973ff04aa400b9f31cdb2210bffea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19f830c00f46699304ca9c1ab0e7204f210f6e829b9dfafbf47f706ba222e46d +size 57465 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8eb03d16d1dbbcb7cf475e3425c0574cdf93904 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f74088666e04111f348736b3f68dafde111f06cc9dca02e30206144bec68cce +size 22291 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4dd04de5d01f4df456cb915c7a81613241867cde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa5be732dcb1d4ec7bcc8b70c908edee88c9472df790fb15214ae275feb04384 +size 169262 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b81b2eb6af49ae464e98a842dd019e7f1006f53e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e39cacb1de861818cd34ae5dd032b87b872bd6ed31d3bec0dc225af9acb8ee0 +size 90224 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1b1e5ee82113247cf218a9254a8134f31bf5d17 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:260024daa5c7314e671882759bcf3020942544af051937216fc4c0b9b1db4d2f +size 154200 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d1416b9a0a2867883340718fcb6b6f6d10602cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e9d90f30201de617e175f9417330385b571b90168513b7c24f0580363cdbe3 +size 79473 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f78929717533549f3779e32925cac7c5aaf7ba0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:732b3e18ecc4370ecbaedaf99f73560b567bb6dd8305e25d66fe89ed4d087c52 +size 74364 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5878bc6a4b76bb3faa8980348234b1b90c3184d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abaa9f718e4e5e1db1cba9e14c500517087d1a0a83fbcb0d0df12a8a3ea8f209 +size 82572 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be55cf0a6d8a37556f4e41c790865ae48377fdc7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:343eceee1dedfa555c7b8e1836cbc86efaccb5df834a91caaaea9f435a5543c5 +size 89891 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9659191b1ebd893f969a4a9829a249daea8b1be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3659bb0406fb3a271805acb76e8079d2257d45d98d7bd43830b3864bb060236 +size 1004423 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d9b49d012f85fa73d6266a517be55c8ef0731c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e59ada98d755ee927f3cac3bf88e734745bbf7447f9725761cc6f5e54bc35fb8 +size 133015 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff166d3d343f1b3a060f81dc3402de0fb8b612ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e99e9b34dcc5f2d3ad179b7b8cddb2e0555a2b5d639e20bd2a459bac49aaa0b +size 183110 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe690e95d92ac9e1782af5caff6ff504a7ac6387 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a975d562f47c1fd5a59bef80918145f460960f949fe8d7f46bb7b2f30d8df6b8 +size 27527 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdf4917d68eb157ded6012c9acf721b728e66e13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04f170fbcc4bf68901a0d090d62b9bef501639eab17383e4e3e5766722ba93a9 +size 120427 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbfaf97273552e1591e1e2a811276c74e65f2c5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e259ca8c8a71e70d2330460b9ce97f8e25ba4f241f65ba15183c140d78216f91 +size 58010 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec2b6d4876989899379d609a77fc167576b28084 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38650ac2dd5bc96b4195855b4a8b4229d573c4c4b9352606fac067fa946bfa3c +size 25434 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98b12f0cb9fd6ac209b9d4f9fc8f44996e000869 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b58ba591ac5cd15ecfcb0c6f922a4f2ce9e5e1f6bbe3e63758ff15da3d5ee0 +size 39495 diff --git a/eval-results/mmlu/0/ckpt_249/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_249/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccac0f60098d70f19512dfcd738be182252cbd69 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:676fc1bac3a9670b4685b5cec66f1009d1775ca59eabd6d273d8cb2248a1fb54 +size 32960 diff --git a/eval-results/mmlu/0/ckpt_249/results.json.tar.gz b/eval-results/mmlu/0/ckpt_249/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d04daac5b54cb02c9832f6fca26c546ce347188e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_249/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ebaf8519773f0beb5fa189974eb7c6b20ce758a802af784e6ca506e09cdf10 +size 7596 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df7f18eb67adfd868e132b4f6e667d13619dd905 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fb138d97e0bc266eeee7ddff83dd21fa4fbe0857a233c3254f4271c0bbc9a7d +size 17068 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23b434b10942b923b947d9cd93e674711b93ee3d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3812bfd4a2a6a7a531619720f3c36f7d7305e91307a8eb33a345a5b4c6ddf754 +size 29820 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..646d8d6e821dc306345d77d214137d8c018506ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37225f057fce69f2586c9bc4bac21a67f7ce53449c8bd8d2a97f5ebf1a3f5fa3 +size 39815 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f31eff394c7a529861a93d0c157269ddef864b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76265525ad965c583bb2d117ac9e9bef25647d9013af4f0879654839070eb491 +size 26765 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb3ef1718018e29195870a19241ca453fa1ac976 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c2078d241b2fe8d7bb46bb4ccb4a992d102c5f86787d8216dbf58a3c2fdb195 +size 61253 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f21471ce7cfb1a4f0e144def55e2a673ca97a09 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:737b60a9507b274a2a617b4f89f35b007d20ab355df95aed2feb8fbd22b4eeed +size 40410 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7870572497214a671aef23c941943e1e35decbed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:727e68c99cd670a87bbbce2a94cb8811d901f03f2deca441770ad2bfa9cd4eb5 +size 23817 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f966e561062a9b644ec67024fe8c6ce6add4e002 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47942ae2e15643de12322c07876f41594a5910a39518cb63e62c13c9fb2bf33 +size 31107 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cead6af555506e1ae16edbb031747f8db4b5eb2f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fd12fc700fde5e662e56f49145db2c62de8c3934c01621d07322d7b92f39380 +size 22952 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aee3ee2e11daf25a764915d72d9b8ff5d35f997a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0055cdaf0dc18f372968ce1cf4a83a9d7307bdb7b5ee1de5f50e34331b7395af +size 60903 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3abbb541d5fc6a48238d3db14f5863c54b9faeef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a9c757437e67813ab8d9b99378d645eefddb3d8d06109c17aaf1e08f7598726 +size 25729 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da8d6189bdc7e7bde2e00e5592e45d292dcb5576 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:727bb313754ae8f92f9c33f1b322400e4aa58e960da55abd6abe4e5b1a5c8b96 +size 25773 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a1cdab59a7b32ba7f8eeac8fc625b3e8fc963d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f888706424701515adf65f3dbd4251290de798225a8d7d5f7bab28114adb99e0 +size 46506 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee275fab27d005caa57c6b06cf5ad55f279a8009 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a275efeffcad3702e3e9eebe4de1cebb949872ab9f8f2c275e13c3d526838395 +size 31500 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..efeb05f300101a733a34dbeaf08965f8e65b802c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aefc1084136692a4502385b60368a6cd11a50b8da90cb1b2a54a99a1a928e01e +size 28779 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b52ff27573cbfe4971f3b841d287dbf91ea7fc31 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d4051acffe90cbaf7ec04061946016c44745251e23cdea732f0817400c82433 +size 74671 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..143af08496c287aa0a4eaa2264ea62e9cbff059b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a121f47eef34f8f04412084122ec4a46f247dc12905849e7c66c624b5be4f4d8 +size 30105 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b07f26fcf0aac86a33e7f39d2ce5c05d621398e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:067f5071ae06a8adbd4fa3e402465e3d0058f7f9bc0d6079f636f383e7c7c261 +size 19113 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fdacb8b431a41f5cac9459ac157cbc08a3ccea7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcf85f542a37812261b31b67a88a0d46bcb48120a32df4c931f44da7bb03c586 +size 87988 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5afa35a14e8bba3fe9b570dc1fe5039a192936d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b7a09102890c77115d14db3f8395e4f272b96e862235524325605f10028e6e +size 49998 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55d76db589e08f480ce2b8f79bdf0506856ca689 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:801eaf9e4ac5fd50c03cf7ff18eddc737a7f3873b56a821acdc749c69e141d71 +size 31654 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b0b7d0b5e9f2a986443dacb733731601b506e4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e50a7e75731dd499ace125fa2e88aadb15cf34bef35b9fa57e4deda92d7787e +size 145495 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e7d2ef0bb02e01a318368b61ce90d6ac06f7377 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53bff590fa19569edd76f7bf2b1ee4c1d405f8cc92c1fa65f8ec21dd277663bd +size 44583 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b764a634f53e740f0f759fcef9b30ea5446de88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23740fa5d7fb58bbdff4203c50820efa64affe29845351d2c5369c442995c3b2 +size 54414 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a07086e6d150b34624862ca59f397faba0f5d37 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a5216bbec00c83573d6e8894616fe3b3cf1d709f93bc7064335ef47bbbc04f3 +size 92816 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f7bc19250d863bf4bc37532004383d85600f2cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a7c15e8012d78e2dc1bb3a39ab149429793ecdc602a28b231c4b7048510e6b +size 57140 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9dd458ed7ae8176af5dce31f7d3e642802ff98fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b79dce5e061827e24a59541b06e3baed2b1d033c0bc34498bddca25703d6ddf +size 58468 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e52d7fa38897ded4ab77762b7fb3ce3858fba7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5727eeef0d49dfb1a5492fc32226ec112a67bdee89e642c027d7e8f02a4e24a +size 42868 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bdbcaccb328dda797620b449d0b6f6155919da7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d4b84ca1ce7024f43013f7b9a76b1a9fa9d4b9f86200004bcdfbda3c5c097a +size 143785 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5cf531e8275de453f705acc5206c89af64bda73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16d1b811cbeca09fbbc586c952795ff966ebcc6f3ec26e11828fb18f0fc43241 +size 71208 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb3205e70bbada7b25fbd5a2192de147f8d6bda9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed2c6d03223045828bc99df0c727b6326244fb45a0b2878137e34f1fa457ef1a +size 161999 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed97c3e3300a57c2a39f1a37f4db95900ef4712f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ec3697049a67032a35dc6c7a4d84d1a130c5c62bfee4175fb885e6494320078 +size 209617 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e2a81621ca030a3a54f3038ce3ab2ee3ee53e47 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e204fd275cead7a8c6302442424888d35de3e95b148f5b3539c74aa77a0d1955 +size 49919 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc354975003505d57b1b6d77604f8f0a0ed904b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a573d4d594f5b4e65fe74ad35f2b4acc4e0566ecfc23c05a309a3c298e3c8559 +size 31540 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e998967aaf6b9ab9f87db10d778123aabf92bcbf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3edbdd6807caf0c0f2a1cf6e5c9508e957b2c64e005ec19b77c9120bf5aa827 +size 36074 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60e6d5baf3ee53ed7d6190f2ed39cceca1cf978f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9ec605b5b4a3ade821da86d8adbfea56c83a368b8c1a1855ea5d854a0c9618 +size 29480 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5dfdb90ce908dda64b085d40f6f517830bc04ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04d50bd9ef507331563fffc0b2707f4dda13631f7f70049acd63e9553d0e95cc +size 40368 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7f2eb438f85f0ce33782c34b1398f87a19b4fff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2373dc4cd7e25240df214fb2854c81fe683cadf187c83d278272a8547ff139a +size 27055 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba0da4f1d3a670104d4bc4fd4a8355703c1b0398 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9d2b88f48189dc515c6a770aed6e405a91482f15fa93f79b663f8893d4a0d4 +size 21690 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0855ad81ec567fc2ced00dc3cccb9e67426df596 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5608e272fc6f6ce076d7fad8fc87d7c87f2584b69d90ae760c22313bd08ca01d +size 57452 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b926c4f20e1ef74914b819ec4505593ad961659 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adeaab918464ecf230540ff6931b0074ad9e19df932bc66d03e49f522e511b7a +size 22250 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45339205c5a443f319d07a4017ce746ce750d801 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fc3a23c9a1ae0f34d17d5670c822d6255462338f53c547f490a2ebfadbfb420 +size 169216 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4843deee570117f3e4ddf309facf446e462dfca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5178c25e16d897aa2d62f19d5bc961a8291bed0915f1d6cb921b081e74ed0eb8 +size 90241 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf1719565a71bb432f6a6fd7333f70cac9574813 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e97af9f7b16ef1ed522558f0a191d452f1da267dea916f5144c7b84c42fbcc4 +size 154289 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc544d847be3fa0f802a0de40957c0f5aa9d0eaf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7d0741e0c5d585417ea5557596fddeeea67eb2b32de2d9da65f50acd8600a69 +size 79462 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01bdc2805ed1ae58344f37b6cf84ef0bcb9854d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5be3ac9f9f7618c622cd2f56ae5fe48622cdcfb49cdb33247b258ab410673aa +size 74335 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44b2103c92196b2bef3bbe00b97bf92a5341b99f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00c7192c1ebb832176bc73308e5f4c6f96cdb8d475684ae4b812bb147306d481 +size 82495 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..231e98d3a139adc868f202538265a634ae29b7a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a990489300f32788aac2af16a1cf87427b8240862a64ec1eceae68fb42ebb5c7 +size 89923 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..575cba82f9ae1860f1a80a8f0116f16d71ef928b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0284063aa321d9e6464a77480726643b0028c6e74a45b5ff6eaa59a9ae3d70d +size 1005302 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48b38831fef394d7c37b91de6817f6de4b61f2a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda31b053147c3c7c6b1e6ba7c6b3b2d5e02fab470118b17900e300bf3eb1224 +size 133044 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67e18a065babc9b9cc8201723b928dba78017313 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51180715e3c2a8deeea5205b2b0e335779421d8fcb404af3c40187fae55e9d86 +size 183086 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b2876cebdee321de1a9960a83e3b666f1764b58 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47dc23d13502b5fec104c994d4708fde44a64055729b4a312692d257f2bf4fc3 +size 27517 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ef2dcb407e312d63d05ac9901fa8abb8f77ce0d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34365f6c37527c82ded24f59bf24bbdcd8ac1a1ce0599d82ebf84f905948e6f8 +size 120434 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6431f258666541ddb047d1bdc37416afaacd4552 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb088aee694464038f840646ad2d60a465b7d7f63baa432589aaaca552b76a02 +size 57984 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82e8178ea3291cf755ed7355f0d4c6cb65b23f5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f96c61d27f7c171814ba254d89f02f277fc2799b445dd224543c8d3db425e39 +size 25413 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acd88bb212135013cbd97603d48ce57c0673535e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4167715b4b7d1d418d417dc30cc202ce6ef88ec5fa19a050272da6b0ba24f5c9 +size 39454 diff --git a/eval-results/mmlu/0/ckpt_252/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_252/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7290ad7411aafecab1eabf08326baf904360956 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ca9cd491ba523e01c1c2ab71f3f24eac40c817f1cff9a8afbfd444a8225968 +size 32952 diff --git a/eval-results/mmlu/0/ckpt_252/results.json.tar.gz b/eval-results/mmlu/0/ckpt_252/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59dea686eb3c6eb4b897a815e3fb7313b0cb878d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_252/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13fe7921b36c062e2f3cbe17acd59367a6288953adf312e3c20a67fa5768ed16 +size 7616 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17e909f09ddeae4ec6968a2021fe7024fed516cd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deab3bbef317428a1fa43b3f62f1150351952a2db7ee4c02098e20187330c60b +size 17085 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2d6d0d2ec47abb8b225afc89539ed68a56622ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f497835238a27637b710ceffda5efdcfa234ac733180c4a0be9a28bfc8b423bd +size 29813 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55a3381eee86f646b2e54c1ffcd64e9710135157 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7373f9d51bfdd4d897120ed08160474af3883bda1744b86a44d127b6fcfb5f +size 39821 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e27535f413ba20af2004c68ff52904ef107dc44 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:786e738b77007994c7cbbb18e50e2f40852415be7b1c230db83583e67942a65f +size 26790 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4842193af5b801d35d6a12426f009c407deb04d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e8619e1ffc7ec1593c6156378bb3718331209b3ea09c8cb9a3128e7ed77ef55 +size 61194 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a39c05bcbfd59d481a83100d30dae0de6ce253c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda47982316e848af72b973599b2aee8789031d9cfd4569a0c4d3b7d6fe4e09a +size 40360 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b79b12257ee562a75c7ac9871ae92627b72a1f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a87f746fb30107870257b8eb6e2ba422290ffad4680c1eb0b3086491f2afdf5f +size 23815 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ad67188d351e150f86827e9317ee56cf64130c9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c475c950bd1731861dbe2e732768aac65a478a6628d9a2aa9d3eec5c37331519 +size 31131 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13ce87fd02837490f6f7cd724ec16bc4515a65d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e529166fb2f61b8df929643cd18ebae05a9c61efea8c9c6ba5aece6ab625c65 +size 22957 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26f85024e30de8274b1ed325c7917f47c5247fbf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a70f47155136b42a5ee33512c2c2934f227cdc10a8beefd8609c425142822032 +size 60928 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe61bf2f4c1bd0b032fcab4bc9cd5a35a9f96476 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4e600a44cf6dc862f792aefadbc97a7493797f1ece66ad6d3d5eaafeaf6f6d +size 25765 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e222130e58a8078acc498cf07636eb2bc37667f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ee3e3dde37d20f2a8effe5a6020cd28bbb2980d4df6d8cc177eaeb70b0e4cf +size 25817 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19ad1dd3e4776dcb1482323ed6c633eaae21291a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4976240f86c1193e4752f66fbe8f798a1ffc663e62ff037a304a449e9ef969ac +size 46474 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..045012e1cd7370736f2fa566caae903cab007e3b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7318b42e5450501b0df87449553a746049550c7762ed5c2c01f978c63c232ab0 +size 31518 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8dd57741598e0af3f81d4b223dbb46a8d79144c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f814a53e4504c0617988185cae5af95cbab2e932c0055e5f43838f7a1075b6fa +size 28790 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d01ce304115990fc72f147c559b8c39a4cb3f08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3980ba2c6390c3d5dba652e0755cdd7dc55a41fbb3bba0a11a0cccbef883c3b +size 74629 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32c77d2a8412effbcfa3a5cd71d649a1897a3ea7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791b5465221b47589031dccc665e5465084937891b00ee09831805c212e82fe5 +size 30134 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9826076b4ef24438c17a6a75144873ef07e77018 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f297f74bcb4bab0bfb80671301113a4e6c122a4448bab4b71ab4f261fe4d71e1 +size 19142 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7df289f34adfb29c7b95b4fe8c0e68cbe8e29993 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54eaf71b0c4651c0c00abfa0996856ead77a6ec9becd4c42e0dc50a2f5ea0eb +size 87945 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb51b951ec56abb550a361ad5ec88e9f614ae52b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02db3513528ecce7ba5cae9872e4aa4f031fb194ff192712df67183661fe6c7c +size 50035 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce4a0b809fdca233094b44a9ed760150cae61d49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28ba67d4f97adb483731819053eb3098d6f2bb77d8b38b01db45a628f105ac0c +size 31610 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8338b5303b3c5f9c31e21b80fea779fcbdbd52ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b09a9b07b0571ad29633f7d841154ae872907f50285061383402f1c06ca38d6a +size 145447 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae9b5bc4625bf6da819b012bd21df54a095b1c40 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:680636d74e73906db67db4021f0a15eabd366c3b3bf1ac6766e89572b18ca035 +size 44581 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ada74eb596be170c013d70b9234cf4011258a915 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb49ad9ac60b4fb678c39634d876ff9277f847b46c574f912686945563c8bde +size 54343 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd1f5c8738fdcd89806fb0c9e96a5fe69e57062e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3a71b0050036aeffa34d2d6f645152a8b07faecc70b642a5978dd17a7b9ff1 +size 92767 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1397f43a7d7f9873c5b0eb7d128e5fb0667681be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b56336ea5121b97cb49b25b2fecfe4995c7499f825db68b4fd8ced8457803d00 +size 57139 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..183ddc3f30a198bd2a757a99031590ce9ad2b144 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30b3c33b22bfdb33ec920beeb4f8eeda5fce97c94b7603622085caf30cfc9f77 +size 58476 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b4113b1b2830dd1cb85655f74aa4f03b2a2d371 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c810eefad054115660fdb63dc9a828838adabbce5f357aee73069d4525449a9a +size 42862 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..469d910e7a8ddcdef51b644886d2cb9fee96e3a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae672806ab18ecfa0ddb5b95c55220a1074c97755505503ca0f3ccb9cb5da4a +size 143752 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c6a02fb22c4f08232371bb7dcdb6a582286fb20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:512a0fc86c22d2df693b11fe7785f88947cfd55555a0c53c19c19a806f043997 +size 71186 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..beaa49d4b6d513c580000d1885f94341a4e6250f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e2932377a017a3da9b94dff7dcad655ec64eb9bb7417b1d7471811d72a2897 +size 162011 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b90eaba6d269f9eac771ae277a18ba2bb0e1459 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20a02f74dbace33b4093f2be35ea9a2ca5d5025c74e7a0969d1fa0cae0689d1f +size 209506 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10fb8b04404fa725a2ccf81f03a89f3e0e8e116b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aad6dacfaf154ba2a3ae7253ab8fe6ce8311e2a66fa3759a0d3c555d44741f2 +size 49915 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55b7cc535330e5d91a0c1fd4255be0cdf3b1cbd6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:402ba9a55f555cd1c3c576beac640b6ec6a9397927abdd224cad72d53c76b4b4 +size 31521 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae8e0f284ba7702c907715b096a75e8c8ded37b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:865d3bd425d7c712abe47c6f5f5a552b91b303f702db043dba01817f603be966 +size 36095 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b58d62b547ddcd0bbec58186b6689893b3f35393 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51bdb519f732e2ed5c65e2507d60c35cc366be05709381e548a9d9b6823c225a +size 29488 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1bdc2eb6fb53a933b94f81df6529bcdd1239b32 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d022168a64ef602b6653958a6249ff8b2e4a1e36b1a5bf3fbf1419a823685427 +size 40348 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3abc903a7e695ece693f1b60395d0d3ae5bcbb6b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ebe7a9a69c56fc8f03a11936e2233e7410f86a70ddd5c2d8f6fb58206243c9d +size 26974 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c58fc0317342047c1c197a2bba5836e5e013f68 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:006a704fdf47bd7b19c7e72f9a20fd00881e7d18a6919c7073e1dcf91dd7d3cd +size 21694 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d20a9dafa8cebe48d78c0faa951fad6887796d4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:676e852938b172fa18de90efad354527883674fa73c1cf223bd53470abf41cb5 +size 57421 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..891a522f4a3b08572b8ebaf58d95497e6925f967 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:862d241a7e04dbd9559f473b6d6cb142d398ce989abf3108141d5d63d33c6235 +size 22256 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d6228305eaa693cc5a48793742d9bee495b6be5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d84ae893a9d5011de86f178946d973b036d8b0520ca7ff320a01601ad3bf2d5 +size 169184 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca42c59bad759949272a7742b93a8fa2f496822a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3c6bbc3fb674589a2aea34d112d8f46844e86541d3bc41b45a28f7ec9c44446 +size 90233 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..810495a4a973256cb9e8cb99e04392b14154aa34 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be0fa15763edbc3a51e05df5e367b31dc5597c19d16ff18e8af78f25ced89049 +size 154720 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d4a5ec16c2008309423728c6d92ff9ea6c0b60b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d247c7f679e725943855db422f603bc116067fa5401b133732122b4f626bf3f +size 79444 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51cdb82a53c33d3945481a98fb74068ec25c04f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cdc1b14b03fb4be6bddae4e67f5578f705a687aad0e155447aff811c704f2da +size 74347 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38aaa408cc4504195cdb1b5411eae785e3388973 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2da65832d5b7f5adc84b3096df02a17c8e2941158e3dd96324cab00c41ec2f98 +size 82561 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81c3c62ae1d751343e58edac79b83e7aa296785f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c30b77dbed1b1e2da6e691906b247183c5a34426346710449e1af804fa341b7 +size 89959 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1fd9aac271a1f1c0c5b4ff9109a96eb6b723abb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a76dd65c77c25c9b301e1094f4b80e721645504ceb59d4ccccf89210973f868 +size 1004948 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e77a78171ed0b2545895413e8b29b5ca6288791 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8267258ce59856b51f2559619e2cec696baf442db78dc33bc36ae37c9f3732c0 +size 133127 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6596b8eee59f4838ea64d7cbf6fa98fcefdf6bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d97e769d527295e2984955b3122ca6118f57e85d1768df72f7b28e12d58352d +size 182999 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..426c7776ce4aed975159e20dbe9af40381de31cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8953636db3f27524f8044f339d3d1c69a903d7b583aab43a45fbeddf1fbf9084 +size 27534 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a37aca79259b5b981c8f6427f143e695d2c5bb2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:144c15905bd2e19843ed9dbc1d8a9d850c0e3792c1af69dedee76f574f73ae16 +size 120465 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1078e97254246ebfb1544a4103ad50ec28c408fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0edf68a3fb02c60710e2b0667e4c94b32d2ff66232ee52ef244f9b67700ad9 +size 57946 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccc7062772525b9372ea643937b8914ed3cbcf6c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64792592862e1f7ddacfbf89b295ab490ceb612420fb141b0a5d359948fb3424 +size 25429 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1567962a946e3765f0c643ab25920a79fa62abd7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9c46af005394cf6969346d9792e861e7d64a1a713db3b8d3a8ffaf94a15b8bf +size 39447 diff --git a/eval-results/mmlu/0/ckpt_255/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_255/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..491796da4a284530ff8b5107772b0e7f9e5cbd37 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa7100d159178ffe34d05d30fb14fba7de0652a0b2b0ff3b26e2d9b0c064a51 +size 32965 diff --git a/eval-results/mmlu/0/ckpt_255/results.json.tar.gz b/eval-results/mmlu/0/ckpt_255/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc11d3f811914eb6bd0ffab2b60fbeaaf24aaffd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_255/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6e6d4af40370afe22e45a66337244ad0fbe0141a724c806d7c9c1358d5b22ba +size 7618 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..550fa8a0c2ce8f2e8b6be00b15f6de6db2216346 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038dfccef084af4e117cc20938656c8991d1f0aad080c9d7cf5fd4e6cc9b3132 +size 17048 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd4bbd2b650aea777c4d5da36405cf7c93703502 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f2c30f7d721345c42a19350d98a58ca687a1577ae84a202c9c9bf62d9e11657 +size 29824 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a852cf50a43c6ed4308299e55ba6c9ca44333ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecee80c9616b0822544d34549c8fbf4bf975f41f188b231af1f001a374a06de2 +size 39839 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8532e23068c964c0688ecafd76be9f36b3d28d9d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc0c2b3eb3c6632575cada8b1d1fe0e85cb22e60e5fc41bb2da27a1b2fcfc837 +size 26800 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec6a683ee4f2bb34c25627fc3b1a364624ea40ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ef6dc81e8c18f459d329870fc71a6939696ea8a083fe9ac5305798e67bd866a +size 61283 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd8c6bbcd157f0a381fa322ce11f03c6ef6dc12b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a94955389f8da3809b4df15a85f76884c269e55ced10863a86bb10a8c9106737 +size 40399 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0736f6ff90c584dafbe3b05e8766edb7c3302992 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6781fdf76065d218d851ab265e3529d321845cb377d1169410f65e5b80ddb671 +size 23837 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba4ac0068843480e451b4b9c26e0a9bcb3c5b0f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:371ed564b8276a54fcc3df35b88924f9b746c8a74850744505e375dd46bd5cfe +size 31116 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e50c9c2e66ccd690d550f694a28d9a34843033da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c00f5b63b6812545ddff5de55ba20c3b61bd5594882e43c4163e9413b59c0c47 +size 22892 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ed299dcfdf7c8e7e57ea9525c36d523470a8073 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b94af4d7d7c8164b741014765cbe177d3a365b67b1043356093dbb10c66d9b +size 60972 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..622672141bd88956644ea45de381b584c13ebe7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:157fc58444e7b44139c6dd42c273ebbfe62686b0f4bb3c539f311ec5c323bb1b +size 25726 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3a9c2a6cf24936dd628671e704525557f5ac73e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3391c01b5100c248b77921e19afb1fcc54aa85b17f8449cb98d5c337f723e355 +size 25810 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0fa9853a85bb79ea4fe9a064461b1b393580151 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea90cfc143a457193ee6bb4c7695d2332721bc5c401c5feea86e80d391c3e31 +size 46514 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..646f20b417e46f6fbad57260712967ebb0d846f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d190280ce54c09d0ea6909803420f0effe804aa225cfab9f8b4a42c23463602 +size 31518 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee2c38889b7f81889485085e4f7dd97a5a8a1d92 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a2d8d1733b82bc9f654a4137423bfb38264953da2870940d4d385c9810fee0c +size 28795 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ccaf74a84679a1e9d62a9951c3a72ba0055fe56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5a59c7bcad21c19fdaee13da0c2716c1cbb3c1542db1e9b3c71ece6f7b0502 +size 74723 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75d6735d8c03a2e0656904a2bd17313041500cc5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa3e05de10fa7dae7e8f056930d8aeb878022b29f5d99dbb11f10553496054a +size 30190 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b57d510649fca2d8fdd9d71cf3a83b0389d881e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47bd0e5d9d9959b30d91550c184553a1c129ccbb77a902de71254e64d063a9e +size 19084 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4d4472922b99b2a897a7e1721941330c0eb530a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a123bfab238f9b74a5af70b60a18b5865f1def5055c9221eb0616d432330e62 +size 87961 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05fd021c28468309249fa00bbe6a4096d87c2f7c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c13178637b6cef9a79728cb4a7db30be275dfc3ebeeb8e744a0395c3b134693 +size 50008 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..971a00af3a26a3980f9b96f51483b19317415fb2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b5690d62245fd0880d3797352c384f3e37339921cbf11477be09f7d09d2ffc +size 31620 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9650fe00e7d6c1d445c974c428235f2b612b289 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f9db3f1b0d190e6efd131792d4d92fb90cfb6eb23859f79f65327140735fa2 +size 145444 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..556375ee57eceea0aa558b623002b31ced5eb0eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ce4965e8cca3042d3ab58875525337c1ade19ab7e3a002daae594b94c23c9b +size 44549 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca168e0dcc35a23f87e75f898f7df8ee705f5579 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b1b91bd2395e2f72ed4f7289e365e5018116d85375080118bb7954830cb24d +size 54333 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82849e10dc773feed01c14e7d811c75e5093e1c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f8c9e34b87eb909bd64f73e5dcf8be032dd254502ad81bc2632cfd583a2c6d +size 92861 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52cebf8c11f24b77a7f3342a87a9864ff9f98ade --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df41ae4b05dcc55ea2c435673bf8aced44bc758cc1caee2fef6f7f1806daf64a +size 57061 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..971ef2703ffdf2f0963ecaa91e1532fc9ae6281c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d45046c00e436ee613d632488c098f665f1f4901bfe29ab84fa81c15b01bf066 +size 58442 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4abb7e05ee7225ae7c3260f6aff5a9555ba23cfa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95bc1b0ddee88da55f0c9684031c20ee4dcc49eb6c05ef41d876a0ea8b914c98 +size 42876 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e334fb5eeaf0dc260af9b84a20abc3a356c24e20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ae65f199cd455fe76700b743367e10d910d268eb007d9a3ed1700fa8fbd1f66 +size 143861 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a586c32c2a3df40ad55d0bc703f74c4f4965a4d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5811328d80071e8ac5fd1bba52254777d07849b7ecfa8e5364c7fd88f6321c3 +size 71209 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1c3d11b848e91a664ecbe0323b9474f2481526f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f8d08888440c2a8716d187901cece8e77038d154f59abdba279d269d38d8e49 +size 161982 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..208b90503038047be5f408a15b7ebd8ffdcbf716 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3e32d80e12634aee311a0eaed385782f3956ca30bfd78b3632972a32e483a2 +size 209535 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6bad02614132c030dd8862dbe52702a1a95f4811 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091e4a4efd6acabdb2893bd7a58fc4b458f048447484ff175128e06b4c659365 +size 49944 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af1f15ed0883b45dd075e9950ca6fd984bb47222 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dfc0629ed3c628a7bcbbfce897dfe1565de48b24d8939b9baf051aa12d2746b +size 31540 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..897fd854024b7c9f8dd501b8161e69b9dab0ebb6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54ac5974b8effb24d8a4c3acc83735a25821820820a0d7ed56d5561575ff91e3 +size 36063 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3674c9d5dfff8cdc68fdcd98b431eaa6909e0d9b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba24095870518312fda4f214148471429a155f60b46be801209cc0a8e903d000 +size 29493 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e57cac1292c44242b074d631a434e92152253c25 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce5e016d7b91e5b644859f9a6eb8ed36bff8c52f373ab38dbcb14eff6bb3789 +size 40316 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2ead3757d6c274d3097b9b64c5daf6ae43d8d3b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa0ccc7b45d838ca1a42882c3a60e39ea511c81b40c6c944167bbd1ea9228743 +size 26999 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3545242d28beca352f123ef2a00ee2470e3f6c9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f0549515c618fd74908becdf2d79376110dfa180df37abbb0361d7f2912bede +size 21711 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..904402e8a68da23db1fbd2722234c7c99c271659 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0020636574bf3bbef12f78d984f9012a69c41a3bfe9601036b01415e47e533f +size 57377 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8935d1edf49965ac37272378a895734db5182f27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1749420ea0b552e2239287b08e5d762fe23d439ab2bff1238ba7e4b3778825dd +size 22286 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d76c246a2b4d244df94cec6c1e876af7610cc580 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30252d54e48bcd43ff50258883cc67857d681387b34ec18422bcf1762c9d69e +size 169179 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eaf0e73d3efadb2766fa10622190fdb2648972d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80441d789a1af59848880261fa59551e0ec32df61f65b22c422f4d0f38fe45a3 +size 90234 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20ebb77700d69e224519c3bab0f9811fb2ea9f12 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37fa9644e68e0234c4bec289c353bdd6848bf20e2c93d5bbc4314b9f19cd4d3a +size 153657 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..050e9fe8b42ba4a32547fbee8d5bb12a55496521 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a587895c896091af4dd9f7dd9d74d6c6ebab04915a20a4da6fc9a22982dcf86f +size 79441 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92de92f4dd6416e72c52f63ad974d1834fd94ecf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:022542910d0800c51b8f9d19ace7dc162123b6c019c8c13afc756bb9ed7e7f7f +size 74322 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5de3e58b7fe47e54874a2a816e086186290d4951 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c082fa3c142285d2dcad2eb85310b92e07479937363da26924f7fcaf6da89814 +size 82516 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3f7f06f427fa2864a9206d216f4329498f4cea6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3400fa5914446a7b0178515a0431c74fd5815c998d69619d236ef25857ffdfa6 +size 89887 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00543930ee9c446cd2393aa0fa6d2d4f0439d3fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42da5a32f25e1816c9d5aac07313406f5fbcb7cedb0b2c63ad9dfba52e82646a +size 1005036 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69d231a834eee6afa86a5059456f31e56669ef03 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf58e7099ff3605c8dc42cffeba325cf6eb4e3cffa217b931af0483dbc7b08aa +size 132987 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7980531c0900d5235c9f93facdb2610d3789d86a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8e10b0f5f60b11dc73b0fc30857a82c554f4c6f7fcac79dc3e4c6303a2a03a7 +size 183097 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9046a61d9061f2b9a426a26ecde55e9fbb69d70 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be806ef7eebd1cdcf1d5fcccf91a4d768040ada6e2eb2f544a49921a290bb6d +size 27523 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fc7d68d1adc95d951d30417388622b6412e31be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:119374e4119119f5eb346a3a3993c2a298163c32939204f875a9e23028932272 +size 120456 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef6f16ab76e72adae98dd19615ba6ec0106ce915 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6214773440852a33dd1948792267641ae5aeb334118982ca181ead54dabf31b5 +size 58011 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b3fa289facec7b64f31beb8717ce5d83cfae11e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8cbd8c4d10f9b102f5b5a5f87df8c6ab930bdc02ccec37c188569388744aad2 +size 25436 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e07c643696374cba2d6806a9f18b69be426b99dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b77d9e8815fce8868f0fea2c60755a9a9c95a373e38e31e8964d75d1a219906b +size 39444 diff --git a/eval-results/mmlu/0/ckpt_258/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_258/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..320cbe85a8c9f395b81c34007275fb8606a758d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695f6d15714bd75923093e64ecc476a0efcfb40c93e0c14218c4204ad90dfbe1 +size 32917 diff --git a/eval-results/mmlu/0/ckpt_258/results.json.tar.gz b/eval-results/mmlu/0/ckpt_258/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..208259b2419331a379814a823a80dd3242a803f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_258/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f505c1c3b49d3050249a1d6f006bc1ea78d7a355e09f3e582e59883ec1a71605 +size 7635 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34b9a8623a230d2b172e1bb2ee17f5df28072af2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc7933ccc8ff2009e05faad4f9a08927fac0df764c18a9995b13a890efcf615 +size 17022 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..289cf891f4e3514b3335007908cd4d913f5d255d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7f21379c9229b8e96bd9d6fa81bb79886440514bd98b23de810e9cfc660c2ab +size 29823 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29237082a002b1ddf590ed34aff5132e64dac5e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6a47eaa558f2fcd081d5ec3a1b33c6696f69f7bbd8d4882f3bc4657b523dcbc +size 39815 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48e222b026ac0933ddfd560eb91772d55daae15b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:376776d0a94cb387c5a1e930719f4134672b2b3211f238413eadb417e6ea8b30 +size 26790 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16820c98a2ff25ddf9b047a955720753dc78716a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d179cf744e1bb620b2eba5acb5754e36096beae2dfb621c511aed055d36871 +size 61263 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d4604daa8de6706bd75af55a647edc2a52c8b8c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2ad15d1aa42a7b344fd323e0b0439746dded15b3473b9537d37a29276079963 +size 40388 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a99679275e5b7b6cd244a8240a50235c2079b656 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9738ccc9c6dc0874221d7e647f5f7b9c51e83dff1f0c4b067eadde3c619e36e3 +size 23803 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b915a53fa8cdd816225174e88debbd323eb2317 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5530b9974f78837eb14ea3effa3a08b0b251bac33f2c8a8d9308db5d57c136bc +size 31121 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78e21f10fffbdb836fe4c514b68cbdcc7818fe80 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668b407b6efcf1880bb4df1da4e54ab82a11b3665ec992c8751b8e19f4ac434f +size 22923 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c19fdb233c0733e8349797829516cab21dbcb3d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77954d4d81f203f2b8f257cd4dbd1a93ca46c0c656c19832a1bc008ec815ef7a +size 60909 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d83d018d36d250bcf0e576a2389daa5616338b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f155dde033d60d4e0bb0ab5a3709c0de0851f12056da10e28b0981691a5b0bd +size 25755 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e78f8b9144e4d86963900aa2ad3cb1b6ebcce91c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57844ec57b740a18a380f2d15d117c8929dd2127408be971552eee135d5beb5c +size 25747 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1402d07316c3cc30f4b2fde28ed8ff14ddf36003 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ca43cdcb998577f2d6c0e55a06626986a8599402327fd95168004501edf25b +size 46469 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99977e34cd405fa2918338cf58dfada71897c458 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6aa0aa87666291282e23e9ecaa6b621c165e03567eb3c43558f5d63df1c7ded +size 31456 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa3a545d243bdfe2bcb538a39d7c3cd1bacd8001 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:024724c0abf972818c2728158a25fb3c39af82d3276e8b968d6957b2f43f91fd +size 28807 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ec9c09979a068f3fe0a15e49252454878b99bf8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce7dd2e4b2e6723c7b9fa5538a7fa4bf083ead720f8f0d99db867fc47e8fb671 +size 74609 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..661920fc27fd35a163eaa2b9bd159deeb2e6c1e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:792ce8d51f96de3d5ffbdb5e8e434238c9b645d6112f0ebbb1dbf8f49f5fb4ed +size 30119 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d90a4b5e88a4a5bccd5b56d5c2c6b72a2c2d1cff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610d979d64d6e268a4787d126a11b641c03f7a3400359597ece517137c27d39d +size 19137 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10d3756166f3bcf1262e30f3ac00d4ac6efd2dfa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fe510c967c32d4f9b02dff95cefc5e1fe0deebbb2a7f0c3cc4841755faf34e1 +size 87976 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0ff3ddf85aed27495f607d5ffffe450a51a1757 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9186b8ab1f817bbc71ac7d8d1307323531df678310068fda57cb9d68be0c035f +size 49988 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5eef05e20bd9119feba3344a436551176d915325 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ddd84baef4f2e16cd09be2ab9627e1486dc07624e1f6d42d2d9e6254826dc05 +size 31601 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..590e01cc96944565f91bfb8b311e6c3cc69a0046 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe7ce2b4a0c2f5dc66bb0ec54f864e4e1e9bd9792786235cf8d07304dad23452 +size 145454 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ef17eb24dd8ee1a5f752e45cf5917c6da692b65 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40130e7f8ad7fd9845cc083ab4928f9310edde8a02405079ce6c401e7cc69e08 +size 44566 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..450f7d7540063fd98581e15a6ae7e3b81d325ecf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c954bc0789ad4b8efbdb15752094be6a7a7ec4a3e815ad9d7a07d2ac8237826e +size 54407 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a51a51bea0fde974d4f829a6ced9d39f00afb39 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8824d828dcc40bd5559437f03c54c7846915d4b1c281b10eccdfd86c83054b1c +size 92777 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35adb2f86b21babca6732a7944e565b45a8e1283 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58e788d258c90dbb53624e7e3de30057ecc95c86ab66dc000e479a354e8da1f9 +size 57026 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..985dda437b584d9e2f278e3b3093ca1ff7b072a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2128733ddf4dd21ef9b6e1c100ae83cc07c139734e57eb706a1e6beb84c330eb +size 58472 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..999f97ee972d758b0c27dc97508fadac1c46a9a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d604fe8f68b12e3b13ddea32f99894083ea03f42b7979e2ff9960b014a0e0ab0 +size 42869 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c720c381215657028e7b0eb92ed8de6be507fa2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8543febf0bdfb12ce5097da03023f7a18f26db501744d2ec90626ceadb3e88ba +size 143857 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77a4b2fdb1ecf54c290979fa3ee336975125eb07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0928541d07470cfb83aa6b0bdad0987bfdb0342b3b49e34f70aef2f860351a10 +size 71175 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7491189d2ebb092e627455d9edf713cb5a3ef0df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8454cc5ac52e01fd11c5c5c6df813d38199566c758f2574f7285c2ab39785e97 +size 162044 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a677b7fbb512c2b615433ef1eb1ac10a8460548 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4886e1625dbabda41cbd9ed7bef044aab5e0806dd424f94b023759e812a3a02 +size 209554 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4210e40b5a03dadb4fd005c2324db05cf161c43c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e01bb44820c3c23712df6f4ea4e7be4271238cb922bc147d9452ac8eba3a6299 +size 49890 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ed30dd795ef7a881883211192f129fa800e471f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed6c39461b8eecde3994e95c7d1122e547fd8bbcd94ff2597e8bd032ebf3d8f1 +size 31559 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2ef39cd3471830217fea44e75b79c33d1ec4a87 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1862d230c731fe2dbe0079aa1d827a7781cf6a1dd58c9951b9cd69505993edb4 +size 36025 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e9a8e9ac3d4e709edbe6d264ab486e6d40c971c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86dd6b059aed4f1fd9b812f997eec80f9ceca405ec2d9f9aaa65bb41ede1fb23 +size 29465 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49aaf5c2fd0a2881b64485bc90d4471774f85512 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcf406d17f74f5be735136a29f6ce4ae63ead037b16f4fe7f15ba1998adfab11 +size 40317 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e812cff8584eb941384ac0ba69d754aa941269eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8c92288ff0bc7e53efe1c8e95e9311ff8c087277b0f0d08a723430db1a9f6a4 +size 27016 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96b76fd18e103f168f4e0f87da7d023697c09de9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66ba6b6877f71bf1ea7a0e884f654fd657e6efa1c02df6d39850ebe7dbd32105 +size 21705 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca4fa6bb39eb83a53468081d4bfad7ac86cf85df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a0ab3357549aa1544772597b6e6864f4b3256b09a62363ba8c03743d36a2c8d +size 57364 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f99219b87b433a9edfc63858cf31a23acf11132c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef2d781703e192572983d13eeabb951b98ceccc1e1481248ad8df656d22ba990 +size 22257 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0bc0784857e60d2116e9ac1264524132f4ca15e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c99b9444ad68449a52b83a9a4fd47b39fbfbdf05d6c84106a684cdbaa6555de9 +size 169115 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afb08e9700dc26e67361848105adba59e9c3e214 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15c4533ecc25e4af8f98c6b9fa357d965dc820e628fb6bca0af621b9c3b5512 +size 90181 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f878d65f2d9a0de17308c88f27dd3df28c15bfa0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837557983d7a8a807ce22542f408680c126bbdaa1bc03356832e3b905480747e +size 153833 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c654aa137e374d230fa331c2e8edfb5ae73cfa3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2417ee16f2813bda3d72b6fc1520fc78f951086b53a718b64a046cd2f7d1335 +size 79440 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a41c108ac6042e391d5cf771a9568504b0f2455a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c70cfa42106cc49a907d428db8126b2f84b6b292f5a8b04a8bfa6a2daccc345 +size 74332 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b838fb3e21d36321109c25071c6f0fbf7f6068e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb6d98cf1ffa954bb2bd2a8972833d5dff3cc91e26dbf651f83945ddbc944fc +size 82534 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44b13e23163bc9dd2f1d017342cac7d3282e55ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db4291610ad14d19fd732a150abb93350a202a38fddaa56ea8cd67f91c7174e7 +size 89898 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f03f255f7ed62aca670be240457c2a82ebce7e22 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb0aeaef448dcd7b0a96e6322350b2b5419355021381f66fdf56b58a134fdc1 +size 1004873 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b59fd4fe9283d381429760d166998d240912eec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5197157436b2a8d4bb1f607f17067e752cf24d65b224ef7f180965c4aa82cce +size 133090 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab4af0bc18c8f275afedc22de3ddb70522bf7c81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde470d1b0c20e34a3b02003fb6f044e41f751c1524c9b0570ec8ce5c85e4830 +size 183051 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b97cad8cd9caf41aa26beef030126b4eb305c51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:052adf2dc7a67f084466aed65c363e279748b4e111df42d099d2f490c2b29366 +size 27521 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a990fc19a9e070ba8ae9a8d4f4d63287fe72afde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a507bc993c05964c2ab5456142292c636d0e2152d9c46aeab822d74d0760a6c +size 120390 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25424847cafbc4f53d4588f66c685a129e0dafd6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4278850bcf158039de43cc4243618c81db3a4834ce1e880823c1c79291419f5e +size 58005 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1216c51f0c5c425a5b58e219d9425bb31688b0de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bdbe392bef98f1725fee1364d95232909c54fc155b10855a316b8ff16b253f2 +size 25431 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a250f7856d2e2fae4ec6d429e11e50674c1048e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:175af9ced04d96f4154be66a8c8d97d9252757d2166f2897f067095278b72913 +size 39426 diff --git a/eval-results/mmlu/0/ckpt_261/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_261/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c249caedaa85c42dbc5fc13007cbedffe4465cd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f1705df35a6436104a489e94fff0828d2c4f3d2c3304882b49bfe62d130c11c +size 32931 diff --git a/eval-results/mmlu/0/ckpt_261/results.json.tar.gz b/eval-results/mmlu/0/ckpt_261/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83d5e54eb6c4a34e8115e1eb3afabfa9eab14dc1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_261/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18bb96e8a15c9e4cdde1a4910bc8b7e84296a3732f40de1d61a1e33fa9a95f91 +size 7638 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..209c0e10817f16deda525599ab2891b9e7c7617f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1779521377b5598fc45214d12b452f3728984693e812d6f2764bafc55b1b2e0 +size 17054 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a7e144810e28cc55404fe36f223f4cc5421a519 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7cd203e304bff5596eda0c04a054d8220f7766108be509c1fd4fdba5223645e +size 29844 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19b977877a8467e6de0fec44d3f1b90fdcf30a82 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f685f44945b6eab2e5ffa07ec688a0240e427044c5e279d0835a9734f22195 +size 39823 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f89dfa39b5b0a164d778c603e70053ecca5c718a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de7d4d2ea43804fb914d7ee199a520ee97098b0a746220a5afd1347c1917efbd +size 26745 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c0f69eb780b29c5a412bd83b6a26e9de7bb291e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e87049e10c9a3dd5a412caa6f492e1251e209f66db6e734f7a9550aaf43fce21 +size 61264 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa97aaa8aa924b944833235ec59bb23bca87a1e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02281e063a7092ae3c49f99ed1dc27292e8a39a47a41144f764b912ebc7ca47f +size 40402 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23a6b09ad7d6e6e059160a8b578708d001a90fa9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f90290a117d68cf9e163c687d950793cf9cbfaf8258a639480704c237dbb09a +size 23840 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7015ff4d39e094ad35b447e9324bef136ba2e6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef3cb992564e97c4bfa1f76b8e5539f5c1b8ccd5eb0b657547db1bc4a61220fe +size 31092 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23f05478bb0507a3dc07e38c5f9934253b609b62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6f4c14b05db166a529a153d8887286f5c0572f54dfc63689660858c44d08aa1 +size 22919 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b60888271caea16f6fd47c64a57a7d0df79298e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b4c255de2231c98574a1cf268e93bed1bf76712391c45fff697b495ad9bb2af +size 60905 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73cfa2179165131c4001efb330a0d0ab4f39d9eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad518ca31acb1094cecef24f6f3d90f865dc63747836f6bdcde7058ddc5c0a2f +size 25721 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..877ff0dfd66168647992aeb9958963d7c000c79c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d623122f41f50f94ab154c6d327b7b646021a821cd889351b3f13ecbb42444 +size 25771 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c02c5a0988d2b3d39bb6ea26d094786bd00d935 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d7f79398dca46a87c19b7560b3347eb3cdd22c97bd19f104d87e24ab246ec3 +size 46509 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..754a90c84ee230910f868d8dceef32ba9a898708 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f5f40bb5a341e1c72ba5f5eb6c9471efa51633c8959a79338d9c8e2f455f3c9 +size 31478 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d8e56c60d78e76038c3535d1cb876dd665a341d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5893a026887d783c1f7f889046948b57c53db35cbcff3cd828cc3f782f3c8e3e +size 28796 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bce714abdb952ef49476b95e45a700dbd74541a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476fa67640b13d44ce0e5db42a7253edee2360978d1e739c67015fec8190e45f +size 74534 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ddabf9969a6bafb52e30bec2ea49d0fc299b687 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba8c24c66fb31fc389355e6a911988c06c286d483102764f7c588087eb634b5a +size 30084 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..817a5cb7f2cf3e0a5b839587ee86d6727e0f584e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e74894132168ee89c790795ec8df29846b203e2d2d373755a34aae8ddbd17b5 +size 19112 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1f699b4705c8f4f4c6bd9956cb5ff417d5d6e49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28fececebcb386cfdb6ddb178a498cdfc9f17b3f36ca9f9a292c86ea263644cc +size 87956 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2670678332f31b1bda0d62366d5c95658b97985e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e7ec9882257952876e9382dad4c89f53afd96ce0c592f4866b3e86f32beb5d +size 49979 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1000b0c89d2bce2ed45fba343faed9105a51b081 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8f21c0325854928f11956ddbc60f09250fa927757b0e8015dd11c81be5d916a +size 31592 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2528a4e01461ad349e416a52f5796ed2a7958128 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d76f8c513d10a7f0acc1e6c27ddb532d733f9b8988aa57c265dc081506c4e93 +size 145479 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf8f3d151882e915d3e17e15d4e07e29971d9eed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a45ea6178d1ac5ff5a45c28cc0f052080e2dd49a7a9a60eb8948d1cbf4a8b4f +size 44551 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f86d8974c8020ecf987762521e97e583fdf3b8fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c99f72bfc3bdd46ef09d48afdaacb9de97c0e42434421a96c10f2f1ddcae7f6 +size 54435 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3db84da1342ec7e7df4c5332c1ad687736cf7f53 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d742fed6b99d3904371229898e26ec8dcf2544a64cd67fc4bc0d84ab497ab5cc +size 92768 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cdef07e37f159005ad78f76ea7bd61d6ee2bd14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f13a801d72c6a98c5af8eb2200e5158f038285dcbc9524a54e722bb0a6e96ab +size 57007 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2c89520b63c3bf49d01cabb29793cbcbdae9075 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5930102da26451098827d5c6504e28a3cfdf17cb1ed60f7a7607b90159433c2a +size 58445 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3420acc25dfc4450a20251f716bab505642a5448 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2271e04e8a84794a58333471cd50d2b09a8130e82c7e2f2678bdbb9f5f9a342 +size 42875 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0164be3a42c7cd9f3b55ccfecaa247f4f50eccc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc787c17dade3baa9546666556657eeb3e808b77bc598a2f786b22dce73a3b9b +size 143915 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..529ec33d52c1315a26829fcd12317666b1bcc8be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73aa4fbd8e06bc56a3f740a177f2e06e96665b23e5774936a532bc24586ea573 +size 71140 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c902797b5c8acafcf7f28071c682f251dacfb4e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49f4223cf54bb0517c2d4ad1f051ad8e0128e486d2dad92f741a4b4477b50300 +size 162019 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d18e55c2f26ff0fc0c11ab08206285dd24a3a49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ca4f855106bfaab592e9834af49905bfe6e05df707982739dcf6a1b9146465 +size 209503 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cef8607f40832bbd38bdc8ae31e64d45e729d33 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2796899320d13220aba4a3ab93ad86df709a63263caffb5c36e5af4038f3742c +size 49972 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..276133bea548ef79fe2c38d93121e2fcbb989109 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b619a5f40b987fefe35292323cf2ca5d97d80c7d5cbd7a37d418f3d9f19570e +size 31551 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77203f2bad519eccd2315dc0ee1fa91efb254bb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e45df39752e7ca468f554789bfe5207e25517c6bac8c02134074b60c89dd8fad +size 36083 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59e0a775ff3d7a26c2da610231092d806ea37f93 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac479521b4f806154275fb8887d8b21213362055c379097bba258cb6cfc8c938 +size 29474 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0d1339de6813208cb20f72f7169989e3b67c871 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e37520164a48d2bae64970ce9c4f6f0cb5c1bdf9661029aa2b513be9236fbc29 +size 40303 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41c36cbf81c03c5725f80b700f7490184ac596eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c1c55d3a4d27cfceb1782b2daad4747324185c92c06b632f124dcc71925a550 +size 27012 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b000daf86ab74d3b1353de238894a4c4adf94de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38ecd734b02db3e170c6221b96833a4ee4321c6e10fd9287e20f178780eee078 +size 21697 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49d72297d961202a97fd72896333aecd1a5e93d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb64a3b0082869aa72c323137e3be7fc2ea214be2d2edc5cb3a94e1af5c0fd1c +size 57458 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d1e139eba550cc9fb221ccae7b79cbbe6c9e5af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf889af464d3897f487ae475392f07c9cb9482cfc39cd00b33c31076b812606 +size 22277 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68ad2cc51e34bd8362eff255cba57cb588bf3b00 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37ed38ee21d2e9f6da4872127f1ba6df3a0a3b3692f4ee00d35321f316af748d +size 169129 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..136073c2fc451f35fb3e7cf905de4ad9c7898a44 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50b37855d924f42ccfe069835248a7cda5bacd496a7066dbfa02fd34b9ea88a2 +size 90284 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8fbcf8516c6be57b01a0f5d6c987340df0d04d2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eddef3790fd6c876d6578376e9ccb5be0f6d88ef34a071e68bf721044d0dc74f +size 153973 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdcf9c6cc05b2529b2a4c326a32e26a0a8dce184 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78304f3bf57b5589edb7fe59a86ebe5c1fc4efb27a9571b15986dbca7cf1ce93 +size 79409 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9b07a8d9668c89610b9409f043b1cd94a2c3d7c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e6cb19963ebb9ce0c211e4e6e0096c2a30db4ce4e0b5217eef6d3c9754fafe +size 74337 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a9ca5b8736d32515e6e97a21cfcaaebea425f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ec493da5561d8f2826c1e5f11f23add53fef89d97e163f7e4d34d1f4f38852 +size 82529 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6191fb9ffcbfa392052312e9df58b321358813d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:313958404a99ae59f99f48031be6e923141060fbe0d22440f71440f71f7a96f6 +size 89789 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa00db34d99ce918038c44ac566f931c7d1d4314 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9078747732736a275bc196183855e5413e264c67073eecb2acc1bff6ca4195a9 +size 1004352 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e11d74796c033bd8c10c888bb1bad4c61b0b35e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bffd8eb7bcfa37f6719cae67e724e7fa5a723d16a5ad3a89cb454e4169af2ad +size 133006 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abb4023c90e2d685bd1496bb00cdee130e6581bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1908ba0817f81e695ab349976319bc90b9679560ba44bbcd0cb70fc5848eca2f +size 183065 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f96d72935dce9ab3475cf269dcc3016fc54ea92f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78ac64179c623b52820c77e376750dff98182c2e73da2f8f9520bfd61bf17811 +size 27519 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2050917cc4111a6b4e797e7f24b667949823f43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:537923c953e695857b95a62f1d4b3cd37f80cb32100d3cf209d8ca0989967020 +size 120448 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35e791f64ec4519419493064a6bd8945f8f90e18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3328d55ef4b5a45415cb312f13405ca67ae4e45f594dba199e53c4023f6dd7f1 +size 57997 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d1805013bb3c0ceb82fa0dece24ea732ab65cc8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c477cca1b1132fb0421b1dfe2e2afc3eee3b4fe72271fb815e7d04ecb77cb133 +size 25402 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd0e9befde3c359b9d5b484d9f4571a585f78c94 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6eb81b42d156ad6ca49e93a16221ffda4ff2f14d4bb240fd08c560a1b1e00f +size 39440 diff --git a/eval-results/mmlu/0/ckpt_264/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_264/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..467027ff0aa354177937418a6a96f257ebeee83c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1204d42cca30b0e003360798e61c98c7364ef0f4453395e00bae2a8a1a06a6c9 +size 32951 diff --git a/eval-results/mmlu/0/ckpt_264/results.json.tar.gz b/eval-results/mmlu/0/ckpt_264/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d8984ee5115b3d45a6dab12c4c0889c60e6e335 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_264/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb13ab80bddc37c636ce53da9fee151bbf7f943c47655256c3c9eae8e6fd4b35 +size 7648 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5204bfa0e383c5cde999d207efd1c82d897b0403 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfbd6dfece8279d5855bb1378a86044339e55ee20e6c4aa84bc0376bb760dcf4 +size 17027 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7c05a3f24332bf0c7d63234f744b1def995f68b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cace246f3942ca1cdeff0d72264e1ec339edc4a33a6f22ff0f3f8f88f9a0f14d +size 29812 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0943b46ce984bcf3ff4adaf72b635d1fd8d50743 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f356ef3ff8f73a71b93001d5439727a0b29b25fa0fad479d6c3de3ae9fbc49 +size 39813 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1e70055a721ef21bb38e4478cffe0c42230dfc7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c840bcd78b05c3c93ab391e687d3d0f0a488b9f6453b04186275aabb0f1c5d1f +size 26752 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4201846fdd1fb9cfa105d83715882da87fe84468 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:174473f490552fbe7676ca6d6cb850d9c97aa8633acf5aac1abf3c4b0215a3cb +size 61308 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ca8c39ec4215873fbc86c64c6674c25cb83c450 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:902e85e80f5f59bf581b759725a07922aaa233e676a51528688d0d5b9eac2c8e +size 40445 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44f12fa95c26bc3027622b2acd046570f3aec323 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ef2776ef07081e3a554a375a7b0a291bac25cf81d78abc55b62b072368b9854 +size 23811 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fc4361eb38992699930aeead5172ff82c72dccb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e41f0a0f50df08a6526eec72cc96faba773b1f83efa740683390f6661b2df8fa +size 31106 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7f7877203632508d3f2ba4fa5828b5e160735dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7305b0134daaa964b5441f6c20c072f71328614adad38c8fc2a8249ca3584a19 +size 22886 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26b22a5b2e309d8356ea26d7b75e0916ccec6d35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996136a071a14ffb8dc9725c8b6eaca1110f54eff5d09a63460f85a9c14421e1 +size 60913 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3c05e6be112f3a0d3eec9c00028f9fe8245848a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:110789b4f5dab3c2827e03a124cab27369bbee1f9408f8c5689571a27b0498a0 +size 25743 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31d536aabbe50c7cca63261fb59dfb376eacf1b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a1616d9156cf23efab8d69068a10c249eee0718a3e012b85ef59ceaf01f603 +size 25771 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e240e519913b2020e85500012b77ece6eb6eba6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fb9fc804aa3971f6ee4bd55d439a8c191f829d8fc9a9ba7d0d8727037410ce8 +size 46513 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d17e6d958ee72b552c19095c6dc815e8f01a79f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10699fcea36bbc73715a2ca1de70733566cb75c776f7a3b3d3c1873f6d09bff9 +size 31488 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06023db94aecd23d34b0b8279ee7161ab1a54137 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4121ea9c296561a26400f64d74058b994ddecbb8413884828baaaa0168aa2706 +size 28743 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ecdded5f35e05f4d3165e9510f1546b75a1ffd0e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e494f57ee568d627a6587e750d6bd5c059774c8cb0b62aa7a148809da0e642aa +size 74646 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8cbcbf83371abf5c405d7636900976522ba02a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa3ee79d0600e4d614f8d0bd9f2ed6fd3ae4a380637fb0db69a0bbfb9f3f48d +size 30095 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e8373ee4a30ee7896bd4c9d71ea8670dd5981ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048e402163b65be5869a6ed795eb1ee0c270336dba879a259f65d25cce855e04 +size 19124 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..298a987834089c3c5e622bdb390af1ddb57ccce5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:448ba7261550605b064be6a3612fd1310edb0c9767659109e5d613a49fad8428 +size 87944 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6afb3fd56cfe2d32cc954b791cb31c5f46fbf54 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270606a2c28212d67452e56f81bd520880ae5fad8cc8f8c34391b8cd6cb1a580 +size 49983 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07999e0ef3c16e09062ed65db46d24c51a9abde2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f285a042b57b9f3540607205aeb50d6596faccdf25cfa91a0568b69e4392a1e5 +size 31617 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61c9f5dba23ced6a466d3282831ccf8d2ea31d3a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53cb88edba0c5f2af41af9e1efaeaf68efb75a5d444e7da6c5811d530b889605 +size 145414 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9009d0299357c365c8029aca66da2742e2cf4169 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:007dddbfeda604ec12a304811556b43684797c4fc189470475075c3f95c7fea5 +size 44546 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..196bbabc0cfebc8e0e3f7869b583137ac04bc859 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd852efbd772b51193b49a1e9b4a9b6da29a96b68ef66634e5eaf4dc32950e0f +size 54421 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05534ae99c12470e5f5147860383c5c7752adf88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bd88b2080600739e39eec754fad9c50c6766d1417efedc22da08c02a2f2152e +size 92787 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a82dd3f64fa078f09cabec24d33245fa5571af10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0f91081520243992ead5a20c0a5d0e783af21a64f852ab111e01579f76dd97b +size 57009 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1cd57e6e17a03ac7a15b0e670c00a7d00c75e4aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5add8eec114c29f17fdae119e812b9d0f4ef1667b18da8c9e2396083c661df5 +size 58451 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9213bbdd5e178560af3a21dd3e01825623ba875 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:045cc53545f85becc46550b54457a7a48a1a55058ca7536ea6300931e869f363 +size 42872 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc6cb32fe7066ea08585a3bbb30a6a0d66660f12 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:758acbd0f7faf603baab290c735fe4b0a8da66b0315230b0fb64d3a58ec2eebc +size 143904 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..781d94cc7c2a9fafadacd788c5545b6a705e2278 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d46f04b55a14dc035f1a86a367e7d09a40506aa97b96517a31ab82ac1cf4fed8 +size 71199 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..999428f6e9ac2671f42b4b05b0ef6fcfede93b6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee71d18dee8688bd9d43c04878881a1f946233fe10b114519a95c5b4902e7da +size 161922 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f23c6ab1572ea8e8f246565d442f95c9c6895575 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f60eb6072b9b7f1ecdc03921e44f6cea4e3d608dc61b7927d1aed83d8a44e4a8 +size 209507 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e601cfc50e1be3294c03b530fef9351c6c59586 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb4d6c78ff3f85d4baab97fe7a561a5726f2a67e684433657652bba288210d8 +size 49910 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5ed21406a440a02a4033fcc29117bdd55b40050 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab40c4be73de8ef9da9025e3ec4f9c9ff5f52c016ac03a7e90092ceb870de60b +size 31517 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c0ecc23078ad7046fc98ccd5bd6b8d242be4727 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c15544b202491921bf1d3842ddf8e9cbdd2b7d5ec8c4419f35f339cac01166 +size 36053 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45039ffd376cc1a5efef60216944278131886945 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9bda153bec0d617a57a26032384dd5f0f5db20b6b74fa3710db6033a38951c +size 29461 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b766807ae9160d61cb6ae93df62644b7baa4f080 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb93a678f1fdcba3d32855f46b24299df408c0ab539de72f467420f28cd629ed +size 40348 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5a41a7b56bfcfc67f8c31bc9365dfd2b7ee4f3e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0577e313b94f0f7ba29c64e8a1e3eac01384b4cb130c6dc063363970fb745551 +size 27028 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ae5d4f10765547278bb288e6c3fbdb4b9bebcaf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28717c0b7569e823b523734a9d74814a99cef81427fb014ef1c1454ef2c99766 +size 21704 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1adca726ee8d4d475dbf480e35de146ad6c0e07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f84ce28a6bdbe7a7e37b323e40cbdb4bb24e1e844d75cd9a2a118e226c4cb061 +size 57373 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..336036f71140adaa7d075b06af434984b639a29b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed4682c3dc7760f74cc835067f2df2b04a3d749afeabbb4f4bc67c6626dd30a +size 22292 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e6e5e6b86dc72b380c4eae07d7e0edfaf0bfcf5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d701555a43073840f54052147f1957a4bb653809b14cb1ec7cd9aa610a15ca72 +size 169133 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca0e3105603fb1aaa95777a0d9b1b0339558b10e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893b4d01cdcb4b909efe6fafa90424831c78723c4d86069abe94de99ef8f8a4b +size 90172 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5b417061765f7847cf42e96159a780c8fd1beef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c1e17342e42ea4d98dabce5d77c940154f2083e54518ac2cd711d6641fe3e38 +size 154124 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..008fac50908488e9deb45db9babcbe1e8efe667c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f55b23b89114f7bc497a472eaa97c4d04c1fc03be701804a67a52261bc610ef5 +size 79377 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2534b6489383060ef842b59bf5e8eb5b5417d14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d69f9b3141084e0a5b339d61c4164783f324e862e09b7b988901cac217839bb9 +size 74332 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f70d0ed363b27290e97b2cbb29edd1f84bda4f57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e2de3a1306c56d98be710ef00f73dfcc4582c9acf4e42090c8ee0347e734d6c +size 82528 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ac278e45224efd0441ac250af25e180e74c0a0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5adfacb1dec5d0984eaef5e0802f29aa02c09806b4c947ef2449589b99db1196 +size 89933 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0712fdf4d572cfb841b1abef6cb6926a933e556 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f5594074e8d06f228cffd7027000afccf5677fa334ce30896175eac7526dfca +size 1004740 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c72c61f023f0ff08f927fc4044aeb6eaddcf5a52 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9761d25e9c3bb3f0aff79d4de4de820d17b9d74e9ebe0ee2ff26fcb5a9a20eec +size 133005 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b9874127af46e3411a5bc9007d3eaf868524bc6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69751e9be635d24f0d9b7b31309a67dd7de495207f690da5d58c83e99ab270a8 +size 182988 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf023e595c43f137c79bb8eb2e289e67eac5e6da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e16b21fa82363e274b5db5cdfd15a408368bcf6219b92d2b5489a8ebac3614e3 +size 27519 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bb1cecfc1092e3d2307ce7be69712fa8263c90e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:193b6e94b9857ae89eeb4f790d3b7be009fbb165e357353464ba9334d5c4e880 +size 120526 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53ba91cbc588bed238b3b8c79fa37e273cf04569 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeff5a56a4495ae6779fb963baae38bace7777e5705a1eaee6bb2eebddc6cd22 +size 57981 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..907749cad7cc1e8f0ec89d46bbf8c2caa6160d17 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:334e73a16f4ca820b916a3b3057f4dce0a9bed99489996f565edcaac02a91012 +size 25425 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e826f27b4d4e70620a79f52be2403209a550b2e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea2fc6a2158c7ab0ed2597ffb79567a28d941a48c53097374fdcf36252372321 +size 39452 diff --git a/eval-results/mmlu/0/ckpt_267/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_267/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..565a5e87d1e3797af713170604171f46d89a1f4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2fbf942a2b66fe73c3168e2115a76426e7ea9d05e748f7f3c035986b08ff417 +size 32881 diff --git a/eval-results/mmlu/0/ckpt_267/results.json.tar.gz b/eval-results/mmlu/0/ckpt_267/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07b1b69ad0250dcf9f17afe293a6ec5933d0004c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_267/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297c623cef8c32c70c8ce7c5ce02f713ab9b9cf78f294f53df4bf900400a8211 +size 7621 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5826fef3637d83f8af4f5b078dc7be2cc6b1fbe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150dbccfc84df48ae1bf4ccdc08092f3c2329180192757df18b0f11c1424ab6a +size 17044 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..89fbe33b7dc5f7f80942896e30253b7a3025a39f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42f5bcc9d3f4f37b45e68ea0f134ccc0788aae2b9840d7a94e18ffe0872ff7b3 +size 29826 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fee4fc4faa15ee73132c4c9ba064c731773fb128 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baf52d8111dbf709e0c1ed3d9fe2d7d36a61f9e8a97ca5158d140cfa87410a03 +size 39868 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44a6b57f06a604a00baf49befe983fce30f52478 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b730652846e30f8992952768ae0f11f60e16af2ba7301c96480a96359d16a5 +size 26744 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcf3d9a36352a77797601fb755fb4ebc02414acf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199ba68f99dbe14c1b4035069d52710b39fea04cb1049cf6417f266b06f0517c +size 61242 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2447d862cf77a14f058cc88ae9620a07f686c84f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68422a1f27911afd737f178340336dbedeeaa455c5abc714a80cd31a1413a6e +size 40413 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f55c6e5d87173263d393ee06f3ae9e903788786f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dbd3932bc16be80bd2e202124ba5732580d0a9b5fcec02db2eef0c36ed338b7 +size 23827 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87973a928827f03295e451065abe114e71475a79 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03bf729358cdbad2bbd1a5c81c8539e43c3ca16ea1299ca9886f8e7d86b3e06c +size 31104 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..558b041ba92560ae2717d10aba604985a81e6a08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7d3b8b0db57c35bd9edeeeddb014efbb7cbbe14efc5b9881c89c7acf4ce981b +size 22908 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aed41bf0500cbf6a1b2eb62e7548bac5d10190e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a0ba03b0d0f7d9a43d06ae6434020a7a52da31b054984d0dfd58694ccb774cc +size 60937 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1adf52bd1e58eb976de02081fad56d046ee055b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee355a179cc69902150dd6e4f97a99f7ecb748ee5a44af2f47e3309b74cd7a79 +size 25727 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b4c15114446532b5ac4633cc4ae07a4de2d82e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b156c9eaa3be8fc743f6d0833f8d52d7f57e7669486f77608a907393bce3a375 +size 25784 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3be6b5e0289a7a8287037871e7e8f78f05d3282e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6211741b45acf3556109df3ff685cc2319cb6fad6a646b5ec7f189dfbf9399cb +size 46473 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6de8d32ec4b746347a3fb1a32c7f6f89c77d41e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:883ed77b2483c6d042f5fe5b20e444b8e37de3c0bcef3bd1cff71cd282df6241 +size 31505 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99eefd7aba86f5413f7be899acd4f78692f1c332 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07f1705916500e3ccbe506f096a6f7c5bbe76aa3810b8c1cc22173d4f93555d3 +size 28748 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a7ef4d0ec020573b99887bf8d6997a53af26668 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a635dff5584c9a6fe87d1b8133ee2d9c76febc6d56d4d2315483d9927acb798 +size 74618 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43414dde049dc3b2fdfe78f49f4478b739d79c52 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d5a230503e3b50018ad158f4927fc53b479dee031eebe369a9a752d73a72e2c +size 30095 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e92fc39a49aad965a961da96447abea737ab0d29 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fd47615db4443a638f85ac29982846ca82ee787c10ef94d053369705342bf67 +size 19118 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54ed80f649b8eba558bbd8d6f82cf22e7bff6752 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2ecdd5aa598673d849f3c110bbba6813564a8273e03270955e7d0c5361eb0c3 +size 88034 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb17291209e62114abf4325a0d67e21b476074c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d93e8f1a884a4f7cb17b6d471f10344d5ec5fd67b4b49350c92b487e0ccb9b2 +size 50025 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72b69bc2d989d9ca2c401106642eae876a0223b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:407095728a8f4a650d1f4e1ab629ce1451b17752b5ccd0d27ee085c7c29fa476 +size 31606 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0eef2ded1a78a4d8166c74cc36982f219b0697d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c561311f62994b24957f4b68c2a780ae5c7c3c2f3e14e3b6b5c2d8649f2519 +size 145375 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3c9d0fbd4bfda7c70ea4e03e4373ab1b0648139 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f70a05e945211ac0277363157f069a061d506c2fbe2362c2df574693bb76d21 +size 44502 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb887fe82d054b15ad61a4ed9ca530f01921cc1f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5897e0dfdaded6f9d16d57cdc58471ff7ad138d078f8b8207013c51b77cf0a +size 54342 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8aa0a58d9aca68fbeae75761e3228ff2be7c0037 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcfd090be0ef35599d34aec7e905974680e738903964560178cccff3d3595d8f +size 92786 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b8c1281e30f5dc324a1a5dea27efac918479c29 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a4c8a6b30fc8cdb55d0cb4c95911ac49e6024dd7314e83899230bc97f4d7eab +size 57063 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d39afedc3457be57083d7d37c9150340c1c7454e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:014fd8da5828fb6729378d17fa1d31fb8285740219b14dd91fcbfcb4ccd372e6 +size 58503 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..488bddfaea7a692988cd1300066b66e4c4c09c54 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e633c6c195a57dd439b5892dee7e0d5d7f4f9b03c745c3bbaa146e45c51bd0 +size 42864 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f73c62054bad7f4d1dedb807fed9fce78edca5e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1130931c0bd709c3497f71d71ce5b5187e8fd6875cea977fcd8b344778e80d86 +size 143936 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd59eb8262fc8768a3ef2b366eda52ff57778e76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881cf12dabda639e19dddd31154ffb92215d20ed01c84f34b38d7f8ff146d136 +size 71226 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..026b5cfdecfedd415d68b308715b3f264273ac24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcbb8196c48fbf5a7cf96e54559f83a33e16ed6409eff053692be45046f9e7be +size 161976 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ed8361b7d2ef5f3687d704f4733fd3b084b1157 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35aac91bfbf174cc9c23b4edd8871fdcc15c965ee69e4b12af19636a5435689e +size 209455 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17b568b10cf4e2a239eea970d1e58f2357a579d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51dcb83a2ebf3a4df45dc331648653936c40be5cf12d936795174ff4793dcf52 +size 49914 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8b2a42a4b05c65e8f3535a0a8f287a5f84d15ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2439a9f7828255e3867365685e4503692f0e953696797600c17e557a418d2fa2 +size 31524 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a437c1e575793eefe4da7dd119433d75037d8649 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:413c6e08dc7869d85b8bd877acbb73cc606f541bc861ca055d106f12238dc47c +size 36073 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c143e26b1e35fb03edaa295f32c7843cd441a457 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b851e0e6f368aaf2554beaeb709cc26b38491611406308ea9cb351824a94ead5 +size 29484 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3c0b963252c164c47dc7fe56cdf5beb93ebbd01 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f11e517cf46992bae4536a8e6ce5b4eb71004ade7319e7cad0641314d6203c +size 40348 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..932be57d55a4fc250c59eeef9e32fc1cb358ff5c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc76311fc9649d62eb21d419c78eae408180c6f2061bcea2aa8edcedbce51bf +size 27009 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6f729237b0e6047b0d7230f41d98a43aa5f9e65 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03a12f9b253a7a311b166f299d6d7a786d2cd8c8f58d5773bdd5006d883ac611 +size 21706 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43972679d06f37557e41140914a37f8434e400f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9992fac92a5b052abe40d8bfbc2e201e2eb75f9c7e953ff5e81c07adb4d0a9af +size 57409 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..036515b4f0e4edebb0d2c9481015b4b140e688b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7999513c9812ae867e7ab5781c0d6738c7079b0aa164fa35870b0a33d912198b +size 22291 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2491771c665e14c2fe69b4a90c7391208a0de640 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:167e49a370f41524a44e9686f5561bea7ea9627f60e11485ed3f094775d3fa13 +size 169183 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1178e699ac471f7225a1b10e66a7ea40affc0e4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a4ff27e761f16c87791fc8b1afbcb3e96eff457672be9c4dcabf76c7a55687d +size 90220 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b8b1aa05ae99b08680f6b656ea2bb9539e6a4ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d2894513da84d44258889752d2bba83694db9999a47dbaf09b08fda1dda3c1a +size 154094 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d69bd16e8dccded6db955883858476376a53a1b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c68ff8f58bc202f1baa263ca0d9300d6da6c4609ba5b7a9ac9c3b6ff685b4fc +size 79535 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1bbd44b98026ba5d9fe6507ae1e2318dc765131f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8233cc82352bc3fb94351b4aa837b95305633067056c2d9fcedda1fc55dd6a0d +size 74359 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c9be5b0869f5a64a2fe613447c6f1601a629be7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d9ae92df573b7a0cbeb6def0dc4fb07b22ed7af64f67f177df859183d4876fe +size 82480 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6d22ef0ed601bb730b09dc383d0f1d8398babd0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf12bb9e5f3f18a914ed85dbc754233c4512899fef35b0495fe295649abc7c98 +size 89833 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1998068d0cd8c2aa414be9afd84b80cd3f2f8e12 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f622ae18d2eb35d640e6819e31b2a15cb06f4d6d82044e78648d13bc6f4e4fd +size 1004834 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..201689af7880bcd4f99495e40f0d7a1975d2b25e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f4ec6ae9dd7b7c86926a1581e3f404321cb547182ec53a037b720a7892f2d2f +size 132974 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0734eb26d67fc172572bb682a351fe56720ef4d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:939827273ba43d70a64f7d154a1ad4d2728663516c3e1581f4e283864898a373 +size 183071 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..edb6d903db1ce5bf4093b26f7f864948736b7318 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63356c1e5f5dbdf96801150f867e59bfe75f674601596fdb69417f941b2d2642 +size 27528 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68c3723e3c37db0730904a6e2ee20b2a422356d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f69a90c1a47d3ee2adeda62cccc1b078d9f39600ab58973f63c4a31e6844af6 +size 120469 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e224db4fedf436bfff09390a53500b49b9671f5e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13cf6253b9e4e30a965556599f77b441f98cc268e9b802af029131f213dd8a06 +size 57991 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8434155a59b999aabc695fbb4eca959fa40cf68c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9531dd8d89b8b4651a843e3ca9d4e520eaec11200ff482ad5553bf711370207f +size 25402 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bd5ca2d853a2eac792808a560c1abe0c71224d3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b883929d6e3e89084a57eaf380931a89dbae565b06b16763d2b74b9ac5464ab1 +size 39450 diff --git a/eval-results/mmlu/0/ckpt_270/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_270/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0db68b907e5d397370917a1cef8579ccd2f8acb7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:624c2794e7f49bcada3ad9a35306061ff976a0441ccbf9a784169d16d3d6cee2 +size 32910 diff --git a/eval-results/mmlu/0/ckpt_270/results.json.tar.gz b/eval-results/mmlu/0/ckpt_270/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e29b558d17c6988a2febb38e8eea42a35a27cc19 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_270/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf2f80157459cb4bf701af2d74049c59f02b6e322026c47803a22ad4a7e9b49b +size 7621 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a002e8dbd36aab1d3343aabfe27093057cf9f195 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4221f30e6d329dd877cc88e9528290bb83d3170aafedc65c74184c4838de3473 +size 17041 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aaee723c6d4e10e6189cebf9069a0d4386d097f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:794464a748c06dce49b16b89f36636eb40667c83395e8eccee03fd9efde65a44 +size 29835 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c77d15b7a961e06fabc3a22c19528efcd028cbb8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32c232d595ff6b679685d5d1a81a32d8ff429510b25cad3e9717451dd1cbd4d5 +size 39814 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd5c3d98c7cdcf3066fb5c070a59d729fa141693 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135537af99f1d15f25204ec1f3d5f940b247c6b4bd51420067585a51d8d6d237 +size 26743 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2c7586b96b12c56af189447d4de3e4a2eb5c35d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15b06e34a5217c48275ce230847702a0f0ca850c644b3ac87767c34dc707f33a +size 61203 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cb57dcd3afc8ed220ba2601b407ca8125d28f1f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a944e300c06b9e824c5bef885652136136e603c0650aafd09882fb2e40066f +size 40379 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ef93644d0b958009e6ced181bc3fc070fdf9255 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a904d294acf9d4987967b84e395d8449aaf7b770d29481fb0660336298998ce5 +size 23800 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20e6b24a03b51fc3ff17bd2739b81d348b0dd87f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a83eb74010d136392ee140ce42fa924e7ffcfb8e40c8654b322e59c51d45477 +size 31074 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fe5e9c8aab2c38ea1dd83d3a3443810a8968a10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c0e52e66f2816308a7370171b15ed4060b8adceb5c3ddd1db40a2deff956b8e +size 22929 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62b2bf2c4339aed47cfae2f56aee343b25e0066c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7f9ea51cddd2c231d35af68dba135b303f005f50863ef5161133844e34e42d4 +size 60897 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2af28e06b99e31587f24b6e2707469f6dc3c10f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b12e3153ed6c0d18db0ac174d5d2345cbe2470426259cdf0ec801e7d6bcfb05 +size 25716 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ea3d8b0f6c20fc65076f13aeb35b147cce326ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95d89cea4fdf893e333fee50f3b685ce65faeb7d845a9b03296d9ad01c33b107 +size 25781 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a622cf33ab9e2429f9dd448fabfe9c71747a259 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40ddf7dbe650c2333318e2d1158ac8dfc4d4f114b7e10ead84e9e939b76a773f +size 46483 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60d75455e260b5342988f885f1d21ef6e4589b75 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a51f26a8c113b8c6e2cdf13c404d639544c117cc9858e18b2cbe63620cd119 +size 31475 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..808b8ff6fd2e406c7683eb73f199655cf8d2ab17 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f79d19362ca3dd4f8be00e473c98ecd2bd3b08603b545c7d55742267eec71aba +size 28746 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c1d292179832aa0d00ec0545da5db81f5a87f17 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ce95e99f0a966061f564c1d9674b830f9bf6b78dbe1a3cad00d880f63e943b +size 74643 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f89d768848d8e6476e09d1bdff784302b2901f72 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae7428014a61009935ee91c2718f91a41e6c4230666ba64fd689c8ff71973be6 +size 30090 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..651b7ba74c1ae38dc26b8362a8ecd71e2e3ca072 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e45f12b4cbff60fb4b49c74257a0f27f57f8d67911e51cfedca7d54425e2a8a +size 19081 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b357978b4fb05c484e9774e3ce5506bb3e087134 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6cec17a838dce7dcfaccdff293346b76ed4129ef290686ae7d57a2cc3296be2 +size 88017 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01a7cac8b066841ad7805a50509deef1a47206c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b84483e77b5e932d5387e489ae091995af74313e2c99f741f7d164d53aa57987 +size 50025 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5472caf76b258974125f1be0175293af4b0b877 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a06c12631411b8e9f4434a67d242ce5a093b02b70d707b2547d665f66a8c7fd5 +size 31618 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a293348847b2732138ade4ee71b4d1131b58dfb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f9206d12e72131a7deee8682be487dc54a1274b93372ad43ef3781eef793b4 +size 145390 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4c9a80f6df078d77d58a93314cbc695d5164503 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a403ff4cc673a35b0de12909140cac7227491b0e1c5d1e62897e19f74cec200 +size 44525 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a09f327d2ebf9dfc37e9c5cb238a7bbdd1d3b51c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6469a1dcb57c3abb70fdf57fdad0551e3913b9b4761b07437c20ab01b95a480f +size 54363 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c6c468aa0f307d8635db78c6fb7e7f83486c54a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47b15f2952371de7541595bf891c26c55e960b0ceb08d8cc52f3c3b79f651131 +size 92749 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8016464ace9798d130bbbffc8c49682badea9afb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44de0903b368f45ceb7db8f8fd223be9d34708e8f14719c6659c6d4c7ab2f1ea +size 57061 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61c478da58bec94501d22fea96fcfae0d0042789 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9abaeb405e7dd2953115db088166cb0d5a387f0e67af8087c2f3de10d8a1a128 +size 58444 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..172416806e77a488232fadc4b4e2d8fd332d7d2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44a182fc7c428cb8a4ea63adaaf3fd51ee1994cd46d301dc05da728dd4adc0cb +size 42876 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0102cbbcb793e723e2c8e85f2b39a0b29046c8b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e82069976d7ced206a94eaa4c8dfd7784ffe16e87b4c55b78c848da7b53a6e9e +size 143953 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20bb93107f46525feea4bbc5fb3cbac833699e3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d7891fcf260a70d67f89a9b137d303f1b8794a1382588de50517a1291f772e +size 71176 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..847e4158fb59132b2c230c819e8154a31ad0ac77 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e6ef38229883f51a9ed63b08dedaafff3b149f67c9a858966fffd4669c71f1 +size 162000 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9a844eb3462ffc8effcfe25d91c097bc0cc3bbb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dbdec1e9899edef08fbd523af6f91f7706426501dd8208bc55b4f59dd7a8942 +size 209532 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e14e0cfe4ef31884f33cae6cfbc42f28a53eed7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3380d3fdf0a0c1e301b4471cda5f210f492c84c6c65790d9344b0722b105980 +size 49951 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7ec0be80cb3082f30b24adc1b2a8a5347973e16 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e535082961c781b58f4d8397654885a4925262db108c0ac041eaf1a30f739432 +size 31517 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54fafb22f2a10b147c47ff5bd401dec44d268288 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e7a861af5b76ae06bdf42e073df10bbf16f375bbdc166ca9808324f0f8353db +size 36066 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a3909a1b8c7db51d6df73c1049eaf8b182a8b1f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce6a07cc87eee69bf6527b23e0fcaebe8b83e83a9bd244ed00bd3275e7e038e0 +size 29429 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5e2b11b399257e76f1807fed5eb55b68d9c58cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e922628624de4e1eb69b0e3a2c71060db7e868a2f5ac10135a1e3bd8960667b8 +size 40312 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fff1c4bec34814441cdbfe36f1e81c4727ccb983 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2030b4afd76234689a6fb0bbd2200f35b2033a172665fbe844b7b6b6a6a15403 +size 27038 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e4324ab90b43ede70f827ab6025d13d640ad078 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8f2fddbf98ec9993ec15d72d53d0eb9e4077b2c866c56318d2cca39eedc88f7 +size 21697 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..88f6eeae4c69effed0f0ea9800574dfdd8da622d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1fdfeeabd14de20087078e6dde078b0cb84f62d5f86bdcddec62690488beef7 +size 57396 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67a7e60a9c8781d5039ecdc517003905da9b9427 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c9820af6ff5890c63e3063fb8b66f659300d5a1cd9170f1039a7cf1913a5f8a +size 22272 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c2199c1e59c3aebeef205731186179b96f8634e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeeede51636f04b9f6b474a5d525a936c804d585563a0f5d6274288c058eb481 +size 169165 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80025210b1a6443083cb54ebec512302331dbda4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3aa34d89d8e2be6d79fa9b399173b940660d7919b98e4af1d4d49c78ab9d06 +size 90256 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a3b80da8f2f8d6f21855aa3745c4e31a9816f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3311013d2d9409bbc50957931ea024ae2d1eb2d2546f02ddb878ab1b10c3712 +size 153825 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb985aa970542c1518c815ea8c4fe50568e794c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:660215a085ebb35a1ab01182271489c8b744501e61904c8bc78d93ffd795a5e9 +size 79450 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a44cb45e421fd99de52cd5cc28323c6378238014 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5db6bf47143a2e1993ff3b172f9efa85ddce2a8ddfa9fbfed169026b420ec2c +size 74321 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df885f8b9278971451e9036442ba1804659699a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f69482ec7e5d41259aeb5a6a623b70384cfc724c9e859c4743c25bf45d48b1b +size 82553 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5e4f62762259e6b6a79f9c6a7e17e4c7a637097 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6331cc28977d5a0c526c47dc0b5b92c93b6278e7f57fce25e867e42756307d02 +size 89879 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff785ff8fff5dd5e132174b9d3fd5fd79fd2f502 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa454b774cc773a185d836d26e956df7d99dfbe81d35582d8cee3b6f0aa2db20 +size 1004873 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a46cfb1556481244c90da1d4e78ae1f7b02c597 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e041bd8d0d286f63315eaea763bbf81e4c04dd62192fe45270daf7e44af9c4bf +size 133042 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fe650ac096b8684ed08d669abd3ab6ee5917f31 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fbfe951873a575765611586e97f5eac1ed3947b81a43ce0ca8039a50639fe29 +size 183063 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffbfd9a73151ee2d97fa8337fbe29ca434f599f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:330a962ec3ac0cf8841af0a4c1d2bb477976f5fc7ef6c06f3670d82bda71cb24 +size 27535 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a195b83a1dd503587ad6a828afd23c5257111164 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3822d4035997236673f9fe8408ebfe1b9726aa6d726c4eff1e1a8e6c0c4a775 +size 120472 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d44f3c4ef86ecd58db159a9db3e704a8f61e8627 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ee7c6287b9f9d8708547b852751ccecc8860f86f206712325b94795eee22f3 +size 58008 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66866eea3d10bc21a87568150d7c5bb6aa8f124c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b165a1098b68734d7626832871231a4e356fa18f6ce1959960b9a51efbeb59 +size 25435 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58cf3e0b3c87269ed89851f9c030585cc0e138f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0063e629f4c73245194d0bf3ff50f9520ea32ba96429e90d7531506c3e15663f +size 39430 diff --git a/eval-results/mmlu/0/ckpt_273/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_273/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c1f96b65616e94ccfcb53881ba48a4518fd8352 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2b2b011893b2364b3e8bb781daf7d32e79e7f68de3699bd3cf4d07fc405813 +size 32945 diff --git a/eval-results/mmlu/0/ckpt_273/results.json.tar.gz b/eval-results/mmlu/0/ckpt_273/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb4e75980217bd92653b422d3a2f70afcf126f3d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_273/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8abb655391c0b2464e37dc0e15f7275f8acfaa7dbad641a020ca7ef2308f3d5 +size 7585 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..250072215e4d21da2538f9243de50c8451bf0f3e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8bca71c3b4318957a7dbf7a6d56cc5dc6258eb8212658c68bafc4b48daa1b48 +size 17016 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fe1ffa7000c24b04738ab559a75ea6cbfe0b540 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7548c88dd1a93898aea408695b00ea0e61b9e1a42602020630aa9757da2b356a +size 29802 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c866ff0b4c2c01b0fea442838e8e990809a78b80 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:147745f90bff60367b5096d3359a5a7edf0aa0a9fcc8c17ff2ebbd6f94750d7d +size 39807 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4b39271710081b4204cdd80f3ea211c24e43a72 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd7d8e0cfacf2b0290e8e56278177771d4ec0c6d6f0beb9b13c0f40b2925bde5 +size 26742 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3130e27af2c098190e040d811918f1cdd8db3f90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8187dc1b08730b2960dd41ea71576e3d925a1a8a39045e0d970cb7d3868f1e58 +size 61197 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da0b06973e68185d14507631c37c9c5221fd1d89 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c9659778ca7d42fa2d054b766cc2b1d5197e4dd9f2d78c273e3baa8ced26e3 +size 40367 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b062cb87e2b19ca9b548f160f89923c2729763b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb6f137187d96659164e749cb1cf318bb48a9ff4d0fba2f027336fc3b645bb8b +size 23772 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f063d14cc53d49f1260bf36bec5074240c2aef1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2338be8fb8b607c4fef5c789cd80dbe07650124251d288099676f6e5397aa2a +size 31147 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f25ee54ec02aaa4f5b0e099391e1b01e2d3fd57 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:491669e92bfa61ace186290c278a1fd12dcf2a9f1cc25b3e7e31dd1906dede72 +size 22910 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67ec123e32a46665db9e97b253fc199414867ea3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bff513912ff07941fead818db5314806b4c0722c6668c0a1b44bba60ac5fc8f +size 60874 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f2e690ae07b255af2292f6dba7f4f2b8c0a3f59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc186d3ca1888991a6c1fec5a38c6fa642f54c06db04514086b8209c51cc1d53 +size 25718 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b8c70e36b578028b68d70410d499d817ed02ce1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fda714bfdda45d205d29df1476f1fa13606c1229a8ef0c856b4e00760935d07 +size 25760 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc9c36eee3e88c88460494a15548da4af0fd2abd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:296679f1f1e3cb7efba1b86548865525e2b88854a883d644ab6acc91cbddab71 +size 46437 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e7423f696281ad50b1883da6ec3d5baec61d8fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da82c3ce086c6b8898d62d43666a85f49762c1b46729d7b35d5af871180e176a +size 31478 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d08c7c4eb9652f9a5ad73ebfb1f5f99b773adba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15eee645c20a5277bb6fd51c5da50d6e358b94e2ae92e6e1dc6d2f967081a1ad +size 28760 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bc461c5322b24780ef0491740917472628830d2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3ab11869f05f93d24ef6ed951cf6fdbb67c7c26d61c77d5785c072d55d8bbb +size 74640 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2aa3c1be081e6ec0322d9be9b579f3f0c488354 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc059dfbf753af010730a5a102556bdd5dfffb2a02c9d1817559e07b1a918ef1 +size 30092 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5656426a1ffce9e0693cb89ceaf9cd926609b23 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:952abf7664ed80d8833b3fd69fe269256cc0e5efa41b5e92f94d0ae38763b433 +size 19121 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..408c9171cb54ffef79bd6f5d23ba12c0de4023da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbea4f8810ca4468733f3e5c0519ccf3e46f2d00c2b82051f1bbba93d0b5f740 +size 87903 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d75accb8c73341da55b9b0bf70fd4fc728552bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:930f96af4387953892d447539cc0b69a906c362c36e4679d87c0898ea4c4dad8 +size 49969 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b337f37fb79a5991a83492893fe12974bee22185 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d797949911b343ab86c558a197f96e6110c0eb463dec8bcfa3f1b615469a7c82 +size 31619 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..291904df6d56d026dd09dd60eda90858f6baf80a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76a9cc22692a071f5c34c61e79c33839d77453e0246d0aac14391dc5197d4637 +size 145437 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af99c2d88d5f93a619be72fe37807a339c2a9d29 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfee7ec018afd3b26f6a90976b2c01fb2045b5c4f82a4a2f148e5ce865acbe15 +size 44524 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ef1842c2e0683346eca1c92960fb5f65413aced --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199fdfbbbb497c0c2fa8d70ea5b739ae19e5a78008f97f30bc9771720b46fc77 +size 54410 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5eb1f5651f66f330e056889fc56307f5f751014c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbff8af0f3536408e86b89d6e07fdb103a351fee0481b070707c49d6c258f45e +size 92729 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e003913a06b01e24a376b5ba59df510c010ab671 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52cbcc35add8c0429b829d58f823c1d5b1a27b93289b8993188468fd1632d732 +size 57088 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8c3b56ba8f5b65ba7be7a3bcdfa24440aa2367b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9118e45043010bdb6b3c4dc3d28c1d276b209695ece70ebd308fbbd145f405c +size 58426 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fc4258f491231505cccd6881904f1f0a8ee5a3e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8494cb33dcc3ae3b48e46995a04d25ee1d14942d183b735fa7942d780941ca31 +size 42847 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a39c9429154456d7ecc65600789ebd5836da402 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5853391f5b162e77c31602ddd7a5bf1ad1793635557c35f3a0143eac96b5ef8 +size 143732 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7e5e95fce4de18f7bbd12b94a9fd7f7bb0a8ab9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5153ef1695d32bc64cdc5648fb47a91924671bf8b8f46061da7f8044987fe524 +size 71143 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdb35bf14c9ed8ea9fc23520dc1e762b489a3be6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4341774c37d82a9dd695920fab730568a6ec5fa49753dfec69e63b9b19c9b7 +size 161949 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e59dbb7b5a18945d4f6dbcb4d6028304d38933e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58a041ce23ff7ded93358d8bb09afd514c6ac5c6d41a4a1ebea2e930ee0e9371 +size 209371 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f22df8c5d796db16e1436d3e7b167e56c7cb3b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13471a5d53fa8488f837bbe88676cc93bee08a932e2666a43aafe1da5af2c1f8 +size 49872 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e771c25c7c3b3fc91000627e5fb83177f16f259 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5215d4ffd72e2f8448f3c50c5b206386b170e4b1d4c5d07dbd22062f034a0238 +size 31518 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33938f2a07fb3158f777c924edefca501de79f97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7174ac4ddf2fd059abe8bf2f126b923ab4b427ee65607953d602d51c8a749896 +size 36043 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0205286cc4cf664e224681ac85d2eacd07aea1f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ceb1969a0b12638a376f8378eef11b362e26c8e23b65e79bbc4f544949a3c84 +size 29468 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4315b42ec10ce9bb4beb3e2bc6033575e6e09b86 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08edf8bbf6ba2d5885d2c3eaaab6e8dfb91a9d343f760884ddabee64030a4209 +size 40333 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..888ee531cbea26df7f343684c39f16fd59540bb5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275aee16004e34d55fd935c7ec0121dd5e26dc20ebc3887a7079cbf86b101fc6 +size 26983 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50a25a499df97c48ca59d967728fb8553b337dfc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00179fb1ba92f10b6ae816e1d3b6a73b61fd84aa05e85b083b22ad66738a106d +size 21663 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..023d16f6d081f022eb76801ff56a0d9e4b7c9c9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f91970dc5ebe831cad9795f2c7798f31af55ca32a5f90d3215d1701de1d1c7 +size 57399 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c72f15ac1c796d1c2ccf893633c18f3cb3f75799 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7599ca01272cd35b7327d8ce5f068902f2d55e6cd6ca56662117622f1d454868 +size 22264 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..783edb625eb682efc1cc5bc246fa5b0d0eabc95e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980c96e5e9e8164130710a3267efb906a4538555ce4f83a99b11977368149f92 +size 169151 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4ecbb803d1fbc6d48aa93abd5ad382e2391e0e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd6331480a22afd3bd08ddf07c454867c5d48fb938edcdbb16d96a23f3487188 +size 90216 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eaa704d4da209b04d728a3cebda81740dfcf3074 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa4413bc1f4fbb645dac50299ffc83d9b305d270096fe0e5f4cbec8c78f7bfb2 +size 154094 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..074e5617b1418cd6fbe0efc00fc4605f60d2abed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa0c86df1d3b9ff621629aabe930a5aa680f5d8612fa962c68cc6e84cef16b0 +size 79411 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13c06c42d5f85712ca97d26afaaa7d399dcd1558 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbac0cc3a96f973616010826bd38a803e53e5b99c4a7fb09ffe163fdc779c326 +size 74336 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ed131b68c77979b7777628ad642aed97739733b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c12a22a4568b1c697c9cc74546bb89fde56aab43413ca4dc8f686477506af8b3 +size 82508 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b43b3c6573f87e5c7e0d4e2bd3137f6e5f2ef35a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b864e0bbdf2ba649903a211601426fa91cc53eebfa36a1cbb771a967018523 +size 89861 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e672ebb63dd92c7f1027f110d68cdbbe67cede4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaf7579ab5dcaef90885ad9d1bb88e9637e5fe5c468ef6944001a5b6ad54346d +size 1004836 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c66ebcbcb172636e6c6891e14c6bcc46f0c97562 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e04d27de8f26b5a6923e06fb083ffcb92c789622daa0cad1942ca1c72f61837 +size 132990 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ebf3edff5a291faf193bbf063c40c09f7218ae7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e96700cfec5e2f264a803bd82a20469bf42f903cf0faf68ed2f3b89d2f91ad9d +size 182942 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31aafd517da85d16694c9cc5d23efd713d71d216 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a732befb4b7c2652e6fe57477b68411798a10f4ae7eeed734733d61283fbedee +size 27495 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4772988cb729b9390130a0750d2da47565750a6d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc0aaaf15e6484dfba353fb273c4b2a2b8e04fb575f540e1571c8846282e2dd1 +size 120398 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7925324941fdfcc3a746f94055643c3741637d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4211c9d3691f44b659051d363f68eaacbe9eae5a78f4609463695c72c5db1d +size 57983 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64c8d1033ca61e38976aabb0d24206a5ff7486b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd4b60e34ce6f3fb128e28e148d9cf6c3f17f75af9e12a2855d50add0b9524f8 +size 25427 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b265368d64c8e1c14073e52b855be9c8937c70e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:839d708152d627a32b7fce27a8559bea77c7d1f1aa21cdcb5525e56df7dfd6b2 +size 39472 diff --git a/eval-results/mmlu/0/ckpt_276/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_276/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2c01d619a4c1e8de21e25e91c9693780af338f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d2ebab3fcc47429d7240482d438dddbee3ab707ca0ab65b5d6a33176a5cad4f +size 32956 diff --git a/eval-results/mmlu/0/ckpt_276/results.json.tar.gz b/eval-results/mmlu/0/ckpt_276/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1eadc10001be1895fb3ce8089fe07035cf42f072 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_276/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb20871924b159eedd0d2936ab7e7d2e78897aa734e173b0a6e621530fa947e8 +size 7604 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b05ea34df5fc15f9759c191d055d68f927aff45 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd29a3af3a68cfcdfe418a2a4f05aa692d0e239065e9ac94948fea8d07c2763e +size 17046 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..473e563b5617c19b751e51abd5be843de555319a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea561742c5d0d5cb730dc2e81aafcd616a7b156fd4a4259859d439f4a9565ab +size 29756 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0819f8a7385f88326c62ff89fccd7cda15e5e027 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:094271ea0112475cdb10cd3a686f11b3b097935622c1f22bbf8722fbc8fd3bd6 +size 39799 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e7fe40242dccd03f2825f0b0d24285174a89aaf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e0e8eb5bd3052accb88166ef5898eee957bfb160b347bfa4bef0ea1b407ad3a +size 26754 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4000c156d0d620fc5f968782265445f45f54c45 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15673df0a86adb20acdd2aad7e235016ecb5ecae32b455a7be3e3e0bf753bab8 +size 61168 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d465d77ee9db8738e21b29d5345cfe9f36f5470 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77186f154a402a18a658df29f00c7d58be3bfa3eaae6d087393f6674d836e1b9 +size 40335 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15cf17197953ed2d67ed5876f4bc706285fb597d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff00218c15a932396daa327e2a477b44584b66a6c1c5ab41baa403a3002b2e48 +size 23764 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ad5c7b977813931c6338b0ad09846ebaa3156af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d323401d935d8bd8bbdf580c1cab2f7df8d597d1377ec50da358430e43ffcbc +size 31109 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3ce90a52092ef0d527f2719be8474da3497ccef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fda61788170c3acd0f0da670cce68272ddd71f3f3cb5bfa9e9a1febb6dbca293 +size 22877 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..135752b7d9a9e217544e2cd6c3c4bdaceed3da24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ac48c536a6a9a94ad153e233365ecf02384270eb33bb60c5f6f117e0f057b28 +size 60902 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee7a8d8977c8c32bb5a3b7886babe9709c938641 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfdabdf32e2b53a49f6c9c23b1d99949cf21e395ce23f82da52d09ec6d6e2d9d +size 25655 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f37a875d29e77002d456d13a3e0f71d1313244fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:419cd9559b65a9ae05f0fb7d8b69b817f5bb72c4635ba15cd4be8d63d37ca33b +size 25775 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ca3bcb55da17fad095611c15809a918912ea03c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f33f9d76e1bc9e23ba53a4b1c4405da6285c1341ae93ca4a6d0eaa5236b16999 +size 46431 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae98ac6d62b8a874938b01a7cc06620f20e8d767 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df61229b4f2b898d52ca5e827b52486195059b669d2b95e01edcd0c3ca1db220 +size 31467 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df7e33d6113b23fc0bcb4c35d8fe8e6c2769527b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0770df9c0c10431677388eabf35b0b4ed06a7d8aa9df8a94204b7a241172b13 +size 28748 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5333c5074b55606e572e976bde491c877d5cd62f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dcbf696003f341fb6beeee0a23e86b46e68d967488787598c24c4cbf6bd1606 +size 74504 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f80a7b61cab3cfa0b5fd57cc3abc7999989515f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2857697540e400cde4a77e504f3ec91fa8591746c3dab97a8aa97e7b1cef1d7a +size 30081 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c463199506db13072325355e69c5f9afe0c9bbc9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:542bef0f32000f71c1df004d65ab89a2946c0518b4c5bae556ce7bd42882b5ab +size 19100 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19d975c949da3d1f0927d42afd5caad7ebc46eae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:544047ea7c0a2746ffadcd638e75fea2c2011b6eb9cde0d423efd40dd174a76f +size 87913 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7cd1deba9234b51ed859ad683efa3e2041d03aa9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa481409cdd8d9e570df5a002bf0a8f8f7cf96b41d25b73a73c984aa8e25320 +size 49889 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca9efb3a9c4b646f965b97c37a721fc33566aa9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a71f561ab2620b6ddaa9dd5b8afd98c3fa25944c0d06fd5e73df5357d8363fa +size 31555 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..542a364836719f30751e04281ac5f8b0bfd6e461 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb2cf0e78246622d3ca7f0655bedd361bc835423888cefcd0783d2992c1d1b2d +size 145325 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b78646ff43e4e5b1c38407d8de235c9bc61d8b40 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ee2de168f3825edb35f0fd397aa7c2a0b97c0e12200436b4f5abbad89ffca4d +size 44518 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aea69c7691acba21ac0a5b391e7dee5adb70352e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:542937c30025caebf70eafe80f713c65f3f38cdb6792c7ea2546a26342b57dee +size 54342 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42e95374fd1169461ae4a770ebddb0cfef99f950 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38650dd096af0abde8efb3bfe56e4fccd3fa5851a2fb33a7e7f46fd9de4c06fa +size 92654 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17df8c26785392a8267103c7a7c7c8448dcc1947 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20e72760cccc94fc21cc04294083f0f108fd0fe73592294258087bcf43120f52 +size 57014 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77242bc59ce72af83c1d67c6a229645cb396b958 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c796a85ca32420a5a27d474c77150130ba5dee9c788082034dfd2218fbf34cc +size 58387 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5e9ee431ee00ed19f274979212aaa5581531661 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec685b56979320f27e701530f2d7c01083a77804c8f48fc3d2c34a5c212de08a +size 42858 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..812c05d55d974e91c0d1057dc0ac3f0eaa366337 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5eb9773c88265e0329089d01f3006c6200a97932501256591eddb53feabdca +size 143795 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..575f3971ec087207de3c2deb5434c4b601ed19f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34364146a6dda5e1eeb38d9613fccd07017bc51b94fe4b64900dc3958fc51043 +size 71125 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..681790c3b32581742eae62ee0f63b8f48b4abb78 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec0ec33e27a9011db43e95527bd71bdcbdb78bc19971a51fa5e191c27e2a948 +size 162010 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db2640b25326baa7d31e72d2e02d1e6619b7ec9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a6e1ff20fdfe13d9a187df20c8572cab56d786b3c2db6671265336c0c0c2ee +size 209374 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6863ac1f9cc8377bcd5ad0165ba91e888633edd2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dc1dbe5f484178a45b2e887d8628df7a8f1b7efe69e263a4596badc37cfe6c9 +size 49835 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbc1c332de4cfe946fab80d8305a7db1f423d162 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecabfde47c60c74b2837098d7f688f26e8d7119dcc67c929b68a04a966eb18f1 +size 31518 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd1ef395f867d51ccf401955f6d6afe05f166d97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b75aed622cb9b52264abb23d496e77d0b9e34f9413885b4db7620e39e86eabef +size 36041 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2eafdfded79b6bcff72cb782d88c5f380cdffdb7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b393679d1575de4689edd83ee2b3f0afaf5806208675720841b324738130f14 +size 29469 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f65488b5900f1e73cd4302d53a3ce7fbbeb8d968 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45fe6b7429ef3d3ff3dfa4aeaaa75e9a8113d8c304e07fa11e0198bd936f8cb4 +size 40301 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35be60c02eabb459b7b04039aef4986b51a3e095 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb063e3b519d8399b3993f42a0aef01cd6d7d6ccd0a0d20b8b4a9d96db83b0f8 +size 26988 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d9be56ae0eeb99bdf9ab6f374e68e70d33eeab9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58f84cede9521abed0ef36b2635cdf8e74c83b0fadc34556c1a69b2545fedcc2 +size 21708 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9c560726c2e177b1806a8c06f28397dbb6b05e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb84111b503bcc89659118921e3ca33de5b5ad13ef4b978a0f3158f66d72d7a1 +size 57355 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..998508fc5af71d27ee6a601033fc99312165834f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ea90bac7c304e52bb1efd4dc15d5e493b13c4d8795479ab706ad0f4e057c449 +size 22242 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..765819ddd642a66baa0600f8ce7afaeb5e3adfbd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa9a89f74143025d48e31582c70e7ecd66cdc6173f184c1252fe33c90eb7b9c +size 168939 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..068f25548ec97a986237ae4ce8ba97700b1413d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a44a757b2e0b7321413a7f7a1898d0f8bcfbaf45faed748838ee4b1d67ad9fe +size 90155 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..27a76ac305d1d1ba0dc7735e80b0b212a40b3f98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73eee33164e30133d6fde88acbc0fe055df3db0dc9e5799ec9d6781c5577e8f0 +size 153920 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..774bb5be850fc1d6093a18e9fc4b203a94b9d7b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd5d680acd2d233b4bb765da5e87b3765478d3e1efc415ceb61b400321cec9a4 +size 79329 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f58bcf259cf04a06a026c739cc3503b1847a7f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca161e8009f8a408c9b9ed0eccfaaec363dd47eaf62b449db69effc16d899d5a +size 74291 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39e7d26dd8f4bbfbd5094367f27b83308d88433f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2d8d625cd587cf6c7dd6aa7170f4dd748491426eabdf3a3c36fd2481df6455c +size 82515 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8db460380fadd8575b06b7de13fb18fb32f32e2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdca0db7dcde7329fc12ea50641d7923c7a0cbf62726e921bf9928710a308f08 +size 89815 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f0cb007c7b19f9d96cb41b464849fd052498509 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7209576700024eaabe0f06835c5caaa66b55c7c80810823907b5d6807ed5c353 +size 1004502 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2aa111cc1eab5582eb6fe2fd401073c3ce108698 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02944598f066b046c7ea797babf4666632acaccbf1444b38edbcb486296bcddf +size 132903 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..278a40d81119bc7badca0375bb7562b48c52d054 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e1916bb7c5423cb5577b0562dcfc8b9b0edef3861876e194b557c612bc1fc2 +size 182838 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d2a8991c32d3ceb9b2c773f8f959c989594c20e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22cf60d817a10d79a9c7020d30e2fc2a0f38199f25c03ed40db9b36f1a1dbcdd +size 27515 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5943c658ff967e09807a67e8371f707dc5e44148 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79dbfc400e9fe3e6f39726186e6b6c0ab07282b570a89ce824a3063598022fce +size 120339 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05f8f45aeeca212043ad8dec2c9a12b4ac444ff4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:542ddf53d18a9a2848a9d7879cffc38bc80766b9124ae0484931ebb822e97487 +size 57958 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1939d264c019bf96fda390798fed8d9307946ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f38ddce4697ed4ed27e2505253adc8bbf9dd8ffca17eb86f94a80013dd49f19a +size 25388 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02a8a0b2526f78c6ce383c2a730e2ba591f776f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee013d80e47f0690b3d6cae96fd260d5e7bb642cecde51a4032268ab2de62eb8 +size 39432 diff --git a/eval-results/mmlu/0/ckpt_279/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_279/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb7af27b6f3787ce4ab0e7993a9867af7dc8da61 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47679689d14fe63a265749ac9d24fcda83afaedddf311be68571d62ebd01ad8d +size 32893 diff --git a/eval-results/mmlu/0/ckpt_279/results.json.tar.gz b/eval-results/mmlu/0/ckpt_279/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc71a789483f96af0e7d75acac1e94d539999d27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_279/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc56fc1e4322974f9c63c8bce12232ab2838f8cc4729834f5ffaf8ea7a41b771 +size 7616 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebe3494b3010d4fc2bb4e98bd85bd83aa025b4a7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e31d1db4590914de0cda2e7cf1e54dde2de4fc40131c4d23b5f7cc1444a64c75 +size 16991 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb9a9e203743efb43846ca82f0c2ea58bd625717 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c38486085334e39374ac5f195eece6c9bd0005871b15704e1b86f536aa19349 +size 29802 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..084aa3765a136237124a858e4c0f4bb068e94cb6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e389215ae5917c0b832ee27d6b731fa4ef98927f51f5023d65dff06640424602 +size 39816 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de533f12b42a50f652b11bd93b82329d05106f26 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40e90f8512e140ef88b1ab5b21646b480f874b70471930b3d8164f13a108cbf9 +size 26765 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29f9750486c94773d2a7f805391a310dad013b24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade8170ea1dc23a6c3bf0e2d16ed57bc497286fdd44d55e19cd36f60d79228e0 +size 61207 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2bcedf8dc499ab287d3cb48f357613f85502936 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e74dd5c75ae93128429d01ad01532c1b1671c8ed4b216404a70a253dc10af07 +size 40353 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3d33c3197df0b031138ecaf414043fdd1867d0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5a07ebed3a004f7dcfafd3329f73d81db6f1dac1daa5ce0b5fa0d391cac9964 +size 23778 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c98d781ba2c257e6aab9653c59fd69de066558ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4470b598ba96e3bd34aa8abb90a865dfa7935b1228a62243c34e4d65a31fecb0 +size 31090 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d226750a871e7bbfd010b29164f61f9d7eaaaa5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab6c57b111d83e58288f34886186dfa40a13b283484b43c98f4583403a65b2cd +size 22900 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00b627197ad38d09442fbf17569acd481febb497 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:132499d7cbbb1459efc18cd58a3de369eb7c5fd7e29c9052dd032d8579387bb8 +size 60896 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f01b5ef1bdcdc2987c59b1f53b69f6fca977b0fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a14e56c7277d0028f69bfc1941bcb0336867ed2dbab88a67d9e775c9b1e4c16 +size 25670 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..769b2489de37349dfc1dd7a3b3444914377b06dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f806a954fd9319a47a5732db0efc95bb0741b27818509db20a86f04398bd1c47 +size 25774 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f14e7a413c376084572515f3ba7dcd0ccef3800d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b35a4420c81e506ceb66cefd7dd629e6fae61e6865e131948940d2a474af0c7f +size 46473 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f32e8a580fd3d8202dd0e9059af1619764b7350 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be3ba69d83a29db29df4692bffaa06b24641d7197cda74b38de0c6793c32d0b +size 31509 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8daae4482f4915b7dbea0376cf3e776bf21be3ec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3dcc8ae6873688ebc7f4b21d045f6159226200c6fdf36a5ae609c2a95503231 +size 28752 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d82d132b360b1a2db16f327872bdf848fc7d36e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:923f6b813d50c874f5383f15b2d8f8accd955747b453a4269fb2cc935ec159f4 +size 74565 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a2f0f51f1c16002f9da9a55e724e7a617bb8bc5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54185ffde06d4458d462a1f9b77f7f47b9e75436c4abdeb1a255a6212db88e93 +size 30072 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9887ad6c0f2a63bea4ef979bad1fefa60aa035d2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29834ee222892a4acb2b4b1d53f09d8e4d750baeed54be014d0a0645c64710ff +size 19126 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa704bfbdc4a650d1c0e46ac93dcfe389383a4e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77647efa1248823746608225851fbb72cda7dff4ede670ef7445fd540707fe0d +size 87939 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f4d4e8f8368837be180cb4c278cbe35effe9520 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:876ce6a7ae60a141240bf23647bffa5bd8d0d1bdd61cfc9fdbac15ea097fd9c5 +size 49952 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a73e5f87ba8904479f28e6e64859c6b6623d0a04 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a22a88c6cdc0c19d3660bdc86bfb53e9523c5b889eb6eaad961adcf0944bd80 +size 31588 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7102f6b54e9d9329856d5fec3008e6189c473c0c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6512ab4bbf957dfe6a70088d90954e21d83f35b0b469354a17c9712dc75f238d +size 145441 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70fefd064015c936aa47f14910bf88b894483082 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee37888a618b2fd0348b89a54c7ab6b45a4eb65c4802cc6f3841a8544a57ae4d +size 44519 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe313781021ac91ce05ede8716e66ac7c6ac0bdb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10b30cf5ef05f94074e058714504dc8369572eec236c9863a0ac9b79ab02aefb +size 54346 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04f1293a09a50ef7a919d3beddb07e2f73c97bb4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba97f0d8b1d334f6a850fdca7ea5187505ebcb9cccf12d2e278bef519cdaa383 +size 92731 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e6662e73b26f94ec106f8f428e5a4d7f40c6fc3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:813c09522b2732a39b7fc642ebde9a3a0ef6d2efd900800c84cf5a5adafd0de0 +size 57036 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03d8bc4b71b477e5eae3b8da5d009db56ec90f42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a1c3addff23cf9cc2854a2e94b827f2148a5bf11c23fec0906e01cefa5b6ac7 +size 58412 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ab3a99c0a77ce83dfd2d27dcf10f059c0e5b470 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ebc6c557950d1a5d4abbae4afc2f372aec0542c82b0bd5843579af375adc02 +size 42829 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9657b9117d6f31244fca0f3079d48451c34704b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6caff499f51037fc27630a8d91ffcbe6983218d3bae23897ae9b93f231d6c6a9 +size 143770 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..279babb167acc959fc5298ced0a2dbd214be497a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c46ee71b168c4e5aa6f5736d7ea424afdaf982a6097f747d048664f9c4eef3b8 +size 71138 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c877bcaa87f975ddadc6c5c0ec3e4d3c4fbf508 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e27d82c99ae9564b6e55fdd21ed06960fd28b8242c78614052c3422288b8203f +size 161939 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2710c74a7705d4b55954937cee4e58f05c0a578 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a290505066fcdc73af9a63ff359eeccd1bd25f877aee21934dac5b79e700a0b +size 209420 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc2716cf63af0692f2fbcf18627fce36a0543a35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:612dbc8655d5a35b00f6faf6e70488e91a17ef4a743e3b5a0ff36a3891ceb9e7 +size 49964 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab3ee506a6eafc97b2f9c24abd7af555d3df7c2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f22b7f61842284b6ed493b6a5c8ebffbdf8a4283e833da45752eb69f4222baff +size 31507 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66feebe7695a7004b24cfefd3d5bbd0b585056e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dec591e3b04ca18aff9b55defca65ebe7d0b81cee5d8b2eae7092c2dde1363a4 +size 36047 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30a38651200e8fe886d2fdadc9b91883f6e6e52c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a843418e7f9564ec1df978eb405684a17fdc4768cba8bb15eb71d49b320585c +size 29445 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96fa0dbbb18f356ac0cb2cb98374569bc2b92999 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c095abef2da7307b0a355659cbb837daecb37c1e7eb493afc1f6042adad39fb +size 40293 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a4a35875a6b783922ccc9786ebbc4b54047f6e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc3514c45976e8609f1f380c151d3537d48b9e78a397dbe4b6e8644371a10d5e +size 27019 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d3cbc1a9524675fa2b9c893eaad526b4518cd09 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:630905577d40cc50996a4f2ad0b0018bef12cf5ac282414fba8153bbf690c6d7 +size 21714 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..496eb3724397f4bb55d1c5f0bb0749dc2ab6ed18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb19ba13739c0d3ccbee4b302a43a37d839e2eb25469ad014b2c0d4a9fa00b76 +size 57394 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c38482e1a5566fd5a48cb6d2f53f801ee1d5a89a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56b8f072a5b0d4b76fcfed6c421d5e26d83ebc8c4827d87e32db1f17979babde +size 22267 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d223a19d987412c3172d9e0bf604c7de50a8abc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3932bb232d32812d4f0b8bc4d2ec3bd81fded6e72bef37073aa29b9a7df94d9d +size 169065 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36f0d0765bb0616e30f7f4fb97d0f04e52c2daa6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd31837c77019be943b13a7fe73f6372afa304a65e33c51ff084604036848cd5 +size 90185 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d16eb39055667ada581a1b45bb9186f8dba858a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84818ee70d4192d89c767b83721d46409e11796f5ce7eaa9cbf60f0b383376d0 +size 154200 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb984ac2989f631434e9812ee57a0b8d454f22c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6477f9efd2664cf65ef0894170e156304b3f88626956b59a6163e72fbab6461 +size 79417 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80b5ece68ba42802d56c39709da6fe1b59b23039 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:186237a31945ecb4fd1784f0478e30a86a2754b41f8871b389ca896b954ca562 +size 74342 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5607f98537ce51faeaa1f8a570968974a29e0476 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c59209f035854fc5c5a7c6711821e52014af59756a458c05ee53391e12c25ae +size 82455 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3d8c0cadedfad2a0f3f3d8e968dbebaadbdaf3e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0bcc23aa7b25629bc8f241762d2a29da13639d412e7afa882c3ffad908e03a1 +size 89906 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f593c6279c109d7dca06ee47fb8da0e653a22893 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2285f5ae9345826cfb3a173f93b9aa93e7f05847e5bb72884d5aafce8bcff1 +size 1004864 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02ca2e2f68ceb676c6074c6b27e4db65c3e33371 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b41277e823e8b5c6fa516ff5db3a203556bbd539c20ff5cd81be21a37ad07c +size 132995 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c11f0b01a7c9c09943bd6e33dd7322e375ce5630 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c10ca8451eabd2816f5c60918e17a6e134b73796b5e619303a6faaecbae7a08c +size 182970 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..799ec85e65046c525267d0a4f5ff5db567f97a97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71d56e663f25e091af0a1b56759e2e9dc608501c0edccdb7eec093e1d511bf95 +size 27483 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe112bbed2e67c0f3635996fbb7a33a78062f81e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51bb3e834e1c6cbf68dd3bb818e0a731dceb60be8f5b1027377212bc76db1e2f +size 120484 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..835cb9e43a6c5e1fa4a0c3e606f3dbb8bb2c128a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcbaf6c590dc93d9fff8710737fde3ff91c7220b876d8b994ff9c03db18754c8 +size 57972 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..176872de4c0c9a4b666554774dc010da94c6de36 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e67ddfc1575a268cae43810015ae7d6a8a4ed80c3e8c5bb53e55855a56eb118 +size 25364 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca93af9d586bddd6622f0eb0150e8a7c196e6821 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea0c88bed1f0ba55339f3cfe3e227422bdd9cec46ec390360568375a643e846 +size 39458 diff --git a/eval-results/mmlu/0/ckpt_282/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_282/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c80909ad53f7134e7f4c950e6fb16d5733396bdc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6669bb0186514dc4614dc2c75f6596c4f9099a08b4e01ed587189ab441e45ba9 +size 32903 diff --git a/eval-results/mmlu/0/ckpt_282/results.json.tar.gz b/eval-results/mmlu/0/ckpt_282/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65500ae79455a2463e1d0e3505e18d69d68a43c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_282/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:448fdf92ea529b8df0ab8ad4c9a26c9dcb704f6ec81c573278cd88561b281914 +size 7635 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..daa198081ed543f0a303afe64708572bb3e9e237 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4473869895191ec54b94c04ad665d142aab770f91728ddc4d95773c447c020 +size 17020 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e93c29a138d2c1c8324a33f69f1602f16f1ae565 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066ae3fabe17e24a754d8bce57e1d887c02ecb5c16273c4f765f1638d205e666 +size 29810 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a135ab22997712bc155330b5745b445235fb3bb4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9c238c6f0ca568faf59d5feae5c5ef8a63b7cad7ac5aad76cb919c55d5a7424 +size 39810 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..256ddef180ab3636c1866e005ad017eafacdecf6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61174b876ccf010c23f6e01c8dd1a8e732b4b9d2b96cd12d88b2223362ed10e6 +size 26734 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..254b11faf24554c4bd72d043333a60312f37cf5b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bbdd2b0c86a43223a4349c63586f95d142116e74cb6f817de4b588c2e472dd0 +size 61191 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d035aa05c48ba4fcdc060bc3fefdd0a4e3b206f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe1a421668a2351c7e70b52f2b1205813068d0c62f9c6192127d34904833207 +size 40370 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..563da7436b7713cb81cf76bca7b775099ce57592 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10f2dbfc22fc4b42d845db87227c7dc20f8f8b7c6d1aff16fc65eaac4722a8e9 +size 23769 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ed03c87673c1bb9e03856f4d5a057f55a540786 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26792fe74dd6fc67c5b25a220133ea35d8dc03d54c2b1c21e047e588737a253 +size 31116 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eda24ef2af0744bd753508861c48c664031a9544 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d2003a8d7bef31a4902962c9c0942cf92a672ff63aa9d636ad4ba8e08e1e60a +size 22862 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bcbc7d76804b164f64dbd2dd497b8618500f6826 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47d05e2b215770ed73e9060170a7aeac29d58eb48f8007acbaf1eb1c7b77a919 +size 60905 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c3027d61771370852da75ce15b3c787b4c79db6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceeb415cd54c3083ded82c6f75939fc45779e07b67c136aa4b2ae2fd5ea6ccba +size 25659 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cc19fd8bf564381d2478f2109201fc83510211a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86c46f8e61329a23f056f3a560284e32f8b3ee3711e66ad091c27818f824dc3e +size 25761 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..832abf6a5ee4f3f87f3e829625c56eafc7f5ac51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:968c41584c6e531ab7acaff7f02bc2453321158776ffdb61bbd85ad902a2c3f1 +size 46434 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65e2abb40a0d798443f51eec625ae93cb2316ae6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be447bcfc038d59bb7b64b8ce9b804251c8f8f3a13ff80e8a92c3123dbf0e0ec +size 31464 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e62b9a3d54c6fa147da6a76fc83aa05a3e021e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e1e12ed4fece0da07e36d4bdb49b42ff6fd1f199151419c646868cdffb82052 +size 28723 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cef132af888864cc97bf240941ffbd1ce35da43c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:596c00a586365a887f684d9671fe317ce4e6061db9718cb50edbadb4a1d0e5d6 +size 74508 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dae39432d1e158d92d4678910371af8d93ff8e73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb371b1b65895f1926d60ad2fb0b344b1476cab682ffdc3acc7f4347830816c +size 30117 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d53f4d9a200b9bee7dbe5beb4ff8231de1794995 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:190b59bb7f37de2da694388c9591862d952fbaba9c1a9590340efa6bfc575bd0 +size 19096 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..995eda3a34098b90a6e8b56b9798fa4f6cf6b224 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da6326f397498be8468fca0d405459ff64caa5a3eac83502a7194ea0a3fcc39b +size 87883 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3548884dac8ee5e3c622c7d6c2d93d5c5e1154fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4479b35324ee81371edcf0d50e4ebdec0ac464269d595ad0fa4ca8314fe1f74c +size 49933 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c08d28043912c4007e4388ab44183da24f8b107 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61299b9f37de86990e1469811a5cbd692d6273637846e351d68e14dbcf01c743 +size 31606 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12ea8e55762cf30557e438f963c91558075a7dc6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b38b905607da26489889f7bc641d7ede68f8294fe4de6515027d5054722f13ec +size 145306 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..552cd35929282670f40bff5c254a49209d52e705 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e84682408dfb7e8dd006eb6f0895d1b5346ffdf1272fb37962e1319e8c97f4fb +size 44501 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f93a0ce8316a7462429289298ddf6ea513b7859c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e540613d4c0f044f73d19711c4a88912fe16d2c4878d08d2dac4ce73fbbf1d0b +size 54359 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe80971700d0a2d916b9e310a7f10fc3d14d8c45 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e937218b6702fff879f754aa6e798a883029d4cddc99aef03fb8a4a238d171 +size 92745 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1226542fd1d987ceb2cafe936ee6b8361755bf9d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd9a9701b7a145094b08fe0ce8ccec9092eb481bc9d3b66bbfd95fbf87ed6557 +size 56997 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69350220fbed3e1235631408150fb2e2414a9094 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57cc20d72edade01652180acbba9400cbb5302416f2039d01da8a95406668048 +size 58387 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..229584f1276909f24bfadf3072291a8d3354a29a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62702838efcee7b8856c50319ccc717acb19a8ddd6c1541d005e966e51f4c5f8 +size 42851 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10733ac6d4d3d4d27eb96045b85c1a5b257f9b13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e359d67e44f396176aa6848602a96aebb4d6da2c671355176be59a11d068bb8 +size 143715 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eaa7fa68027ff3ed19d28a9291e5d42d4a634074 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b52fcb9f9afb47feebb50a35e85290351d46ecdc476b1e564fdb8312945cf09 +size 71158 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e804572ad3f9f6ace994418bdb99e513f8422249 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:954789e6158c64b1e40ffbe9f49b50f173b611cf6ee02b86351d49e575af0196 +size 161931 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14f3a04495159f8b34b20534de59f71e03957172 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db240bcc9d7cda79c6511d5989b6c081631c0751d0dd2f1dbe090e9130dfc4b9 +size 209367 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06c9d3c7c396f7e2377d9f89b3ac38bacbed630f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:887f25d7f8931a04f8d03a3b5f093b82f0b8013af68a64c750504bb1933ddd8a +size 49877 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79bd33b79fda2260fac9596ee5440772df043f7d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cebbfd745578e01cc7134ae7a3b4c13aa06c204da6e3a0b70e1a7af5e537c93f +size 31505 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2b21b89285bfffed2fc41c9ea44989592f835d1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea60bd86032757d3858df29f55795c5be449a91daaf75e77fb3ec0d4127aadf3 +size 36046 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32dec9ff7dcd578892854de7c96a1d21dfc72e0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d46f6f508d763262f21a9ccaee8b0f881ef8c0482a6e149f2691d0bfb55113 +size 29449 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b66c06a4ec7caa1a9057450bed0476f07a1e3762 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fbb9432655de1415fc28215107fdef997771ada108f555baa6b96753ffec20d +size 40307 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e1abf2697514eba634ababf8ea261c9a728b680 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c0167329c2f5c11525011d150138ef93b134cff8c87513b6205563ed7f7a338 +size 27055 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26cf49cb8bf54a1a67a6716695f15bc22e46165c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6178b05fac53e7403af4d536c5a635c031fe76c55f4400d8f124c46825ddbb04 +size 21684 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fe5c90647a1a3c109744ed3c91180ffbe8fc25a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f94cfe100825ae2599a3f4054f5616f39ab36ecc7fd216ec5b44efb29f2d35f5 +size 57369 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..909317a14f09ad587b44edea40960a91567b476b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4f5706822334dc436c837d6191e0a73c24bc99cdbb40e4cc380673c1cd1fcbe +size 22246 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4054e378d454cab3fbc49f3b468c367765cf2455 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a9bf3d16c37bbb4d575a328e591affdeb0170e07948285afd1e25047dd2237 +size 169038 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f932cdeb5a178d6f7bea8550491edc3faf184dcc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19704107f3463050930a8d58e6a50270825c1d3725ab0c98517dcf4ccb0f19e0 +size 90161 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..838e3e76a67a4b9558dfbde175d63131c725b186 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c898936f18d2345834f8f39ad874e459f46c2342a1f58f3f0c9224fc9863ab7 +size 153992 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a6a8186518a40f7316d92d9d39712722df3d54f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5099df17da2fefd5d99e1fa220cf5889fd03ad1bac6aaf66d6489443b885276 +size 79419 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28175f1fdb9eb51504ebdb9f1b4337fec407ba97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde5bf38bfed264ee3c9d0401dd059b0b2fbe569745ded5c1edd82e258109a6b +size 74267 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0c376009e25477ce0f48beb5a50e5175a618991 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f99dbbd8c93a636ffdf5d448bda08cc1ec1ebee05f3a195f4cffce7bfdf777a +size 82490 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e02db17ba0a4632ca8e38839880e2435ef2c895 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380bbf792a765b0fefefc01c9b3818bbbbf0c847af337d5668bdacbcab4b00e6 +size 89883 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3af1857d3bf531a75f62cd3f0a5547d77cf4569c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebbeb21c7e8f650cd330ec41e81d47d87f278b2a90cae1a60d1ca4f77db8ba34 +size 1004795 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7003ecd45fdf2442ae1c560c3925625085e5b034 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4199d16d309337121e5f0133775397af4aec1ec0b694c3438b8c7d848006fa1f +size 133009 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13879d368f30675296108850ab80262edecda4dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7797bddd94b39b24e904147fa3c13de40957439ba71e17faa83fbfe4ee83a4ec +size 182948 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..676acae8cb71fa6d53dc87efb8ac5ae0458b3dda --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be78103f86906b69968a6891c2394beb5622d671af9582551a478caaf01d03f +size 27499 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3af380887a49169110ad14fbc31565e52a294835 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f60d225ac95b9adfcbfd47512065b85b8c2c2e00faa99e8110ef47d17b759c6e +size 120404 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea46359d22c6ac608cbad1896a9ea5c928189566 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:075f53a0349582f573a0512bc4d44bcf3d2a467138f136c3fdf50b480b9f86cb +size 57934 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f4cafcc160f9f878b85598c842581bd8dfbc7e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a039d7e060ab3b1eaa6dc0a90509f5bd44750c1b4d5a0251d6f8298319488e13 +size 25404 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3c9dd714cbd6d9a99f10dc2ca723fc24237c70e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5299415e3af33ff2e39f2ffa6107540cff0841dab8f045d6b4eca3da62c5c0 +size 39469 diff --git a/eval-results/mmlu/0/ckpt_285/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_285/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95d2d979dd5a980af518baaaabbee9a3b9075c54 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c63f3f560ef66bf5d2a470f9cd28cd3bb384b1092726a0b39e5a36f595a403d4 +size 32905 diff --git a/eval-results/mmlu/0/ckpt_285/results.json.tar.gz b/eval-results/mmlu/0/ckpt_285/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a778b0adf47754cbb34a28fe166f26b92d0009ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_285/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8305b2e6a7028541e3e358797be192b9c9de2fdcdd10bfaacf4c0d1cdc7d807d +size 7601 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1054bb8598fb6d558946244979282cb553cb7ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a651c7b03bc098ae05397c3baa5234300fe5a140ef0275911e428f311da639f5 +size 17029 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8c88c2fafa91c4bc441e6a3c8bffbe296d46b0e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f1c231b17d6b31cad84a32490bd487d8346c905089aa3bffaff470f08e95ac7 +size 29833 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a2372eef0c06a42e0c761a3d0087bb74c746f9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ce6b53010b01c8d418fcc34a17e5f6194e10475921c26392953170687b5b907 +size 39813 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f37c25cb8336f08f529acc7b15f53511d7907f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef36c594aa71461a7a1d9251bd8d65a0584d32283e859a63ebd1a73faeac20d +size 26772 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5315166e99bfa75d2d8ebc151ec422a10a07f842 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:103f95f3f68a95d52f5aaab963d21dbe0553aaa372fe6d7771fb64847ccd2a40 +size 61258 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..317a61f42b328728b612de856f388ba2ede06343 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15735073d211a85223eabd172d553f0d42f44385533c6c44c978d0bd3c2c591a +size 40406 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0353243431a780e354508c10692a784f553d1aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd852ccbe4d3e2f8cf0dc5e96d5b8bb19893c76c65e25565ab0f8f33fa019b78 +size 23774 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b2b85c5469014bb5e742d6f299eaf03323cbaec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94974753dbb448aa177fb9da3ebcb5f36d6678d2142b4d65a1945d424fe96782 +size 31107 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb88e879b6031e487438ff4516c0b97e235e952f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c725ff8dedef143770a3e3424b86aa5ebcb2faac721cfed974894f0cf1d5cece +size 22910 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ad8ca20cd5c48054cf8b47b56b87a59d19f05cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:237d72f32a2546b40a64dd7779696959c26dfa1358fcabbcf3ea396b218ceab1 +size 60918 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3042fcd98d5d49a4c65feaa5ddcbba7df02805d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1684d4fa4b31e973c96f222cdadde04bb40eed24968d858e1e275cb47b6cd6ea +size 25725 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..679b07db3ce19a8f7708ed8a2f3958b0a9383b26 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76a10f1a3376c353bbe425484b0d7e9481168858257daf5b92ca3d04c57710c +size 25764 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa1080de0d55167c06169c6a85ec69b3edc337f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2140ceeb33732c378b750e013900dc639c0f32250dbd0925c4a3b6b219d4ee5a +size 46492 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29d269f7c99e510abdd203ace5a0e10b4aea16ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac97d54f831b7b9ab3f3f985d85a22330fb207363fb6e3e9994e4e344a2e017a +size 31495 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae4e49a35224c49efcc57413f35e46a23d886b7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7e56a44d7abaeddb075250824ff0e7bb8cf2b101f10809ea20abaca111864a +size 28800 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..246ce226b35d8b245ae637cf06ac48dc245c7f56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7d5f5658cf932f2935cebf21580559943a39391e3f3aa134841dea7d9f4b43 +size 74642 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b76b7b35e83709b20ea23eabb3ad39cd93ac417c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceff1ef58e24c989db3e85af54a98e527f2c8e3cc724df79f351971cba357f6e +size 30133 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5824cddb3f66c8491f7906abbef741936e5c9cfb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56d4c8782df9c9bb37f6ec6cbcd0587a72d2256624e0b3af4b37c63e19a8e782 +size 19118 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..969cb08bd5e6e4ca2ea70daea7acb16a923044ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6483c09a5f8129a82fef3e95363d5b60a7bc19d4b102d4b470f9c59c672f1303 +size 87891 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4cf19b31acd54cc0f7abcad04a8fa614ad46eed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07042a96bbff2bca7617dac3944402c6fde2f6458e3185e522258b8cca84cb6c +size 50020 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63c2d0dc1ce54271642ae6f03195a55c3d926c9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb0ff5ca1f1b2183d259d6a425aa15f0893d809ac26d9dbc415a1669f02678a1 +size 31599 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..465aab25b5cf4c17c89e8101cde9fe0d3fb078fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a9b54ce7150236cce2ed3b98917d757ee227d804ea4e93507688d719ba1801 +size 145301 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67b90f3a69d6eb05a4dc1b92fbffbc5040f822ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4945d2bdbba9568cff6e8512b2c01c5b98bc09c4123a5b8a21ad04bb414b544 +size 44555 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5da7edf2892a14d20f6919b63dffdedd768f69af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66a9386a5bce11f70783432887993874a485ab357a0fea404f71182b410d9e4c +size 54363 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..742e9a548d0e9d306db8648cde7c9e553263aebd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:240dac8d4dddce4f4d6471b6a5b59e23bac3b724da4ab23d5f4e8bfe7e78b983 +size 92735 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..918675a3d4fea7c779ea42849531e629f02e7279 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2efe719d1cd546c4cec495ac9b0bd73615809f7d2af77e98cf3b3bc5254d2b91 +size 57072 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc60ecc0ba362d2465877deb1e7e87fdbae24d1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9657df4fe0b5d77814900514108aa8fb64412d20c3660938d9150262204398e2 +size 58427 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aca5e0b5f05b678a6e4d165c915b221a2f7d6af0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dcebe4d51bbae87572172f05528bc697e1e6d2a9b70a461c897d41e0aafd1a8 +size 42875 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60624edc64a4ede26b960f36788e7a57c20d1bca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7fc7018cea087ad2b9201169eef172a15ac128633155786495fcf6531b3a746 +size 143743 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e6b2123c057a6d3d895e9e726d7645242b34697 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb3e13a9c79033aa9bbd4d2aaf5eb8388cb560a4feb8bcfe3e9b90d3f506928 +size 71203 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5871273c97a17d2db76803f90e52ea3dbc368e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:582aaff5ac139e5d35aa9325fe98748dd68306028bfa642624c333f2f238d1f2 +size 161922 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..625cd9b345438e4977b9e362b8dfd3002b59ea18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a2836bd0ea89fed434f9b11243e8dc1f9ffafb6863a2889daf32fe016d46f0d +size 209365 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9126df671ac6441234e8feb63dde367b88e7f533 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b1f700a9366906fd9661d45bcd39e7fb60c954e6ef07dcfcd8c8cc9ea6a76dd +size 49909 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5f099cb55d3a7ee590aaaaeefd8aaff10b5dd31 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aedb61f6925ee01ad3231de2b21ca5afe9e2f2f23ce0249d5953c883ee2f120c +size 31560 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31c111db93e6db2603e4178231408157ded06844 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee39829200868a04fdfce2dde345fb1a5567192752278a618709aa1fb56fd03 +size 36075 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5aedec074d0453cb87976865997de312db268dc7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1067b13b32be8c6d5fd8272350a75425fde7c4ac8558b8ee22b0543f7697a027 +size 29503 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49a24a532dd25802d2d75ac58e87032e2e53d45c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f10aa8dbfc3e09c4988fd64b16762c6ca10829e2ae47b96cc956708114b9ceb +size 40318 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c84a5cd9655807ac9942c22c7f427e0c69d88315 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba74dd060c0f312a4c6e317812ba367fb3d3fe2ec894de08d2169e7dbfc1ca3 +size 27035 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09a4dce17835af244a5abfd003fc6fa6e789ee3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3363ec998b4aa416a9dc7cb14edfd4dc7ca874e9f9d459b3119ad7b4f3996e5b +size 21698 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3eb9eff9cf45750fb19f3328392c6316f989549a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:947c52ec3f87fa474edd6995c46eb72017780f5d45418a08216843e82cc00a89 +size 57321 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3be98e93d21059368d99d28aff2ae4a0866e7235 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b23af93095bbac63025fe05f854533c84ac801a06ac6c3cfa979aa29e52ae724 +size 22283 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45e2d7cca3046841197da1946674619d86807bdc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03cb91807f91071dc9fd6d8c6690d3b745c26d75b898df7d3f01ff0671cb453e +size 169162 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffa3a56c9409a05542b6c4e31bbde3f72278fb06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:706180c260fd336e18a84e2048dc76355f6724a78b9fee181bc2ca4dc2f4fd72 +size 90184 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4d7f79f40f922d711fa1d0509ccbd7f9b99bdf4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca482626e10f26947d0bc6a065bacde272f44414b076d1f483245f455a10beb +size 154519 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40f021690514fa37763970b9c660ed3009e7d7fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b880f0b6d871f6ccde641f157d20fa687de80a1907c370a86079485d36c774b +size 79313 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..655e903fcf25d7a4ec35a944d2ca2890ffc6558b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c883da5fb8c715d900594428ccf3e243cb82c52e678d9cac04c973866eb2ba67 +size 74349 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff681520693acbf3b27cd56305a53be638898af8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1372cfcebee409bba1ac3c7c2c27c770373a8eab9388925fc2bb4458dc94bad6 +size 82525 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c157600c7d85d47820483fa774b1307c0c07bcb0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7230e512a0fb902ec8460144b78c480e28e4f412edba3a6c255a2e7c90545826 +size 89897 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c4fd471f3570fce3c994faf58c2ab2c3b4ff158 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05483f3aff46a7c6e7f38a0fb11f5ff34d54e9eb811f8880feb7a42d4b5e0f15 +size 1005127 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..672bfba492023731ba461f5b3a2b3169af26ce07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01c8619c2d113a1f1256d839e580aceaf099859a669d1cb38d8f2752ef8365a7 +size 132981 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ae45897d2797d4f86769f9f1fb5a2ea92ec85ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5010656a81677e2d63b0a3afc131c93e7a589698658238e4bd9095a93bba383e +size 182992 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59b9c2d572a43ca3657f552e989be6f9a4cdbd7d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80fb946e7aff5e5fc35c2a10445d9fead5f265b081c486a898bce68ac5fd1aba +size 27493 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da0225a1319d93a5577cbabe553dd661f2aae7cd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b079edddf49f1496d35220300e0b7c32c80d56dc7842a7a7c482027cd32aa93a +size 120404 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ddf1c125f6817104982817c666270eecdb2f71a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:343fe061f50d4a700566e32a454081548a9ef932421d83faadff8e3586a94810 +size 57989 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a125c35cf42f713d7af243f13defced6fe6408c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede34947c6716cdfd0d08d502a5540b9928b6b375695e3e175d5494d501f70ee +size 25400 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6c89901b71d16fdfa027750cfee4d11a95b8f2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7bb14e1805dcc98bda1adb8d6bf51bf267743722cdb119d70cf42868ac944b +size 39463 diff --git a/eval-results/mmlu/0/ckpt_288/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_288/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ac092b717fbdd14de102a65ae4239d520bc206f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14000bb8aee1b2abe3662445fd422add4b7fad14ec7e5353a306065f24b3b991 +size 32918 diff --git a/eval-results/mmlu/0/ckpt_288/results.json.tar.gz b/eval-results/mmlu/0/ckpt_288/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42ae259e18ba58583aaf76fe0f7239301d55ddc7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_288/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a823b7de5363385d738323cee574ba4927cbb6a645207c903a011c996c0dcb2f +size 7649 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8d4e7165bacc8f90b666e2193b707e80736dc3b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fca14115cb0acb32693d9de829610c9ac1200cfdf9f9908a6182189690fec584 +size 17076 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60e25990317023a6b15e977ce13e1a69587b86f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb927194cb53b38ade5c52dbec183d50d73c71c337ff1ba80843daef2ed3849 +size 29801 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3a029b36503815972206a3f9e0e1e0b883c80ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf05ada40e4d3142724d52c9a4cd9b000bafe9bea74654d2e3a6e5f623e6af24 +size 39824 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67f9b335e8dbd0f14913bac2a414680b2195de45 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07645268c00c4576549e99aec8139cd7f84d30cdc113e62c9d4b468fe4989ed3 +size 26747 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e382d2eb583a6b02456d43c977576068d2c93a86 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feeac1603ff6a18a21a67f78abbc2291275236c33a2080965a1d148a80d035a2 +size 61259 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0a3d99bd098407d7bd2a8ef670555b7c10570d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0244d07cc79fff77091d8feed9a6a2ecc73beafa49e4e581e643c97aab195049 +size 40346 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b66306047af1aec990a351d5f612745d6c11157e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b8cf34d11e433717f684ffff3b7fded214eef0a3f92add8b420cea2bda38c9 +size 23828 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8ea8af56a9d12d09adfcf68f434479d5c92337d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e004e534e145970e5fc629962ba804ae41a6e4f5c39363afb6799f955c0112bc +size 31101 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb5cb5800eb8046476239f511141bf8e45080fb7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a45cc1618bfa814d36b252d5f7fc800cf21bf3b0dee646616eef31f9fea7d591 +size 22923 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83f2ecb66ef31cb0b9662f8689d6a59a8b7ebcfd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3891954ac39c61acec89aa166fd3f9adff6f1b3f4cde6b92735f6aedb8bb85 +size 60941 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a51dfabdb34c7c3dd17d139cf2b4b14d69c80d4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8355cbdc0bb3d3cdc191fb2a74bd57b1b62a879da85d449eb00fc441b6f960ff +size 25755 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fb46e60d17ed76e88d199af5f5a1d09c1568a49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee3f582cfac48bb608f6bdc5faa7215c80ed9e2d01f9d8281fd045de154aff76 +size 25734 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4759604970671c1bc6321afb7ee3ec27625691c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f2cf9687a909d30b4ea531873828eda10f6e8e1da51c6892af779748dccaf00 +size 46540 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..192635e20cd4451b0b5285cda0d550f58950baf5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04d3508a2421a118c2e7e986460a1cf7217851698669fb8b07db553439229ed9 +size 31495 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..991bea2cbeadf7c5be0343de2313eef2459aacb8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40cc31686697396c2373757ff2f6eca692ad2546262a78ee5a77f851c241d0bf +size 28781 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8ed9be28497b72823c4892e8a1651021c863cc4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53bddfe7ce1e84af204757bf8a9529cee20d0a682f40f7d79530fccf48f74b5e +size 74700 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ec95a5acda47e10d591596fa2f4e396e29486df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3986dcad5155339eaaf551c366fecb0c90f78e809f3cb7abb9b293b5d49a0454 +size 30156 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..982ac753af1a887b7b6b89dfb234265a7b10398e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fa3814457cc0a44e2c350ef84e138580cbc96927e9b2b9cf751bbee024f5a53 +size 19109 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..844afd588970ddde4b40fe3fc8959618ab412560 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67023976075d0977d0b0d3478d117da7c386ed838913557f5b89c36815307a1d +size 87956 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1df98bdb552a6c1862bb3be50d513fbc69dce14f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4159e7fb5c7dae10de56b91d2b67b7f41b489acab2f9b5c019329296244e7624 +size 50014 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2fc39870bd33747f6d8db5c2c3808994443be66 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c973e92cf3e9960e107cc7dbdf706c3775ab6617797ddeff8e46682bc3bf8f0 +size 31617 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e48d3c7c1b1d8d084e6bb214b2d23f8b5309b645 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e75a10ef923931d1c1561ad00fab33b69715ad1c4b0d1ec4972c05b8b32815 +size 145423 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e9b2160378af22c0649bc1461943c06a5cffe00 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb8062c4ec90cc03afa01822b132c41372b3246dfad5088f8fd62241b949fcfc +size 44514 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31867ae81bd44dfd0a30f71157abbf2a5e80c314 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:071069c14a9e472f3abb065182d6e6627bc9e6457d65c2adef2f60100fe4df1c +size 54384 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7474ff1b45bae04a76871855445d1ed7e86bd691 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b8520c64bcf51b3abbda70108c54128148e0b6ad086f637a1fc55e98a68aabf +size 92755 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..629ac791d593ace2355f69032e54cc32c1dca27a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd99bd4d56d9cd9efff556475e34ed1adabfc0c447929939aec7fd11ea961cf0 +size 57123 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7e7d3ad61485f7d3ddf04a46ad6d0d5be7846b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbad50199bba92d268382209c42be3a55de7d06e9af2b93c3c419b95795caee8 +size 58478 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0600104de4c4ed089e1bc2601ed18680fcafb857 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bce9d1a162218cd3965c21000a2eabfdbe68f8259379da24b267abd231fd98ab +size 42909 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6da84c988ae07c3158fbf25e13edff40ba41b9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:272c1c4ff36d56e03c60721d29f3da7092b3bae83eec4fa5b37d5661353bf248 +size 143826 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5009e2e9d2ea60ee76735a8eeb2e8932a481b4c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e341475cd2dfcb1a13e350fc1f034e51f3adb91e19c4a097eff6464f32aa23e +size 71122 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d115320af4cc14527293c8917cf5b90f7088c2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ccdd58f86e0756710b50cf6ca12c2c92dca75dc68f9f174195284d3296ea9c4 +size 161968 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8acb046a24aee7dadaf983c19b543f9530732e92 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e057798fe50c207f319c959db4ca9632d81d9558d8bf0c129dd01e727081458 +size 209369 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..545a846212e1cc073a18f5b1bd588b01367c474a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d1e468f7d46638b0b3a34183704c738cec0170d4ac64b99196fddadc2b2283a +size 49888 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ba4a8852231b3840e4a65904d9e03d27587c1f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b1d83a67f59f913da514bfb2cc84d06a58b5991be036f75ebd8bb59a0c2980 +size 31516 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd9791512dcb9d1378edf35267ee0b9b481f2a2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:290c3374f5c3aec882943a9baeee18b41a5703514b7c354d6a16385053a25acc +size 36037 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa5d6506570c9d27b6d3138e20f8692bbc5d48ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77316873c714a4868db5925a745a6740488f95c75ea06876c676b8821f1afc2b +size 29474 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d48c9ae4fe7a9855ed7248112b2834fa57514bce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:109dae4d4212a3492f6dd77ac2b1abd10142630017a6fd4bbaa1b59bc87b8bcf +size 40320 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31ca918222543beaa62c2feba5b2cc5acb0db698 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8eb2e64171c38502568150639309864d1978ce42a0a986b8cdd6fc51873b65 +size 27030 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdd6c8963286c9060c14c0cfc3e7e8bab2afc526 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91ff63ce1bd45c449298a01811f5342a7106d40adf8b5f0aa5bb7b1b917f7758 +size 21694 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8d13a4387661933e81bdc138c4bd21f7ba45c2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6d0d913414776010610123443c13a04f870e76bd37f305980e5ce3cf165361d +size 57401 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c73cebd1dd03fbfca8bea31b2c806ae15e1119d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c49107de09474f692cfd674c3730bfbed8c175446a17da4b99f6b6742bf04983 +size 22235 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17b9b13a7621d8cf5b664e21a44f607ec66f3cc7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44b8ed6965ec63c642cb834c1ce4ef16ff874163c74e7b046d01bb2dc5fc49df +size 169076 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e5dfd7b22fdc7a0da74591a5ee481e1f28a20ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:243cdda4ae46c763e6c97380d568ebac04dded85f9320c7a7821bbece6310fbd +size 90219 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ab823c3f7387dafc625a9cd40b0f4c5f32a4969 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d86a70c98ab853b307db308b1fcd6505a51a77ca725faafea4ef937c940b9a +size 154088 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e88c10e62d955740a51fb16f4eb8b27d3a794ff4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9029a7af2e8ba15653e1359bb849f6dbb0120ce78e800c0344fd40c60f36d6c6 +size 79463 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cd00dd75b0ad15710e42f00180ba358ffbd1392 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c217d0bc9bc3ea851f8681c86b909d3b569aa8a355e59e1ec7b20011a5fb1f +size 74360 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7232b70f1a2a17e6435444264fc31bc5e07e8fea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95c5f3a3a6a68473f574d36ab86307706aad7eddd9dadbd8efd8d78ae7ad19a5 +size 82488 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92addb577364c4e3d967eb8f26c6215136281140 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b567ff27432cec8c5bb85ad961262bc248e31e97b46137bcb19d21927c906aaa +size 89949 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a479cc3db5226e5854809622ebc5096c821ab8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97deafcd78859d1d9374f82a3dac951fcd3b8a0bc03ebf1cc81d8147a41c139a +size 1005193 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2753c44835cea8730a8ca57a1c6678e4571c2c7d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a93e5c885f5d228b1966eea6eeb952519a14f2e73bf6ba62997dad33e1bd4104 +size 133039 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b6a9658ecc9e08d6a21d90ed763c650f75e6687 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e672f73e99d09140d06933088f74c6da61c975476c30cf82bc09100598c88b56 +size 183068 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04bcca82865aedb30872e9100c8814e2a1af0c1f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79326a9250e772a89d4b9a3627017e2ac3d72b0151e221840fa4938a6ad410a +size 27526 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e37a325dbc4202e337d1145a8219814e9fe708da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e0c39d76da463cbf667bdcf4c8c38e235ad4fd0d4e937127ed9f7ffce518f6e +size 120436 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e634be0a807b43d55c6d996816e73906a9a6f18f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14226b23c6522be7229843fdf289540a8844796e7d4c1617b55e36502d410e6d +size 57979 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..038f7796dcafda36c11a1125b85e83f810953d00 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de98d998f35553026906cb2cfcf7ee996a05479914c1a92e07d5716486a3210c +size 25415 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d641a8916c0cfccd28deb6e81a7650c8e18399f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649e62e1a2944e523c83d67a71e9986a8d25bee097faa024980c268a9d156f5b +size 39456 diff --git a/eval-results/mmlu/0/ckpt_291/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_291/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68de1bc5e3ff334a1f7f9791823c89aeeae1441f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fea5ff57387831ae637d1908b4c8ec1792155818aa2f4e1eda803a22107d6587 +size 32926 diff --git a/eval-results/mmlu/0/ckpt_291/results.json.tar.gz b/eval-results/mmlu/0/ckpt_291/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83a5b31ffec313381341ffc3832fa736d947ce14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_291/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:009f202b10c29266bb080554fb89049e15afc4d7c2ec1d30858c7c3bbeda9cdc +size 7613 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6c9c2e72922742ef8c4cbd7697f595e4ed6e99d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1b47f73982478c1e039bde6ff7f091e1368259efe52aed42d6a6a23610228d +size 17058 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49203943755e13d1648918bb53448e9cce8510f7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5789ef05a83dfee1517eea3b74096e445c9adcced2827b57892f33581e0c6a3e +size 29829 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b427feb2564b9f672af4cd4cab589782959feff6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a0af1d8cf2aeb57ff7ca96a5ec68530543778aef30bc1df2717ba1a4c40255 +size 39822 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a4be17a31afb9e2b48e1bdcc675c930179d19ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec583f89e8b1b8896b7442e3cfd76b0ea5042cf72f5432c053bc6516b7d4cbcb +size 26759 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dedd1c6582abbea3f13232d9a3e0420a7ed30638 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:367222ced7a02717c733dca52a6b70c206be5948a2b080eea9a4ea855100b2b5 +size 61235 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d54a14ee0b2026d3831bf161c0158878064a160 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58a369c3c9a59a3f0995721ecc5522e3189dc9af54281da99c9326b910576e7e +size 40401 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25ce2ae5140e1297e945c764ef668207e24928d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00beb0df99da6674289b00b00043d6b17589c238fee155281cd6b99778cd56f6 +size 23797 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1365b2c4e1d5ee36c3d0db5de0f3affa72571ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eda3f6c6ae31a02184eb5ae9506e902bf7392e47353e4b23cd9c7878f0c509bc +size 31115 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81a5f9e14899666850f73847deccfa818f32f074 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48214e30ea31ee7209d5c85515ba673ca21a113a6dee5db3fbe06152c68061f5 +size 22923 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..886b0c76c0d31963b042b4c87a2e5c6d5a17f14d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb37649f390ca3162b0b13accc68928a3e3b23e42cddf43fdf3f9a879f8317a +size 60945 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fdda1173336158d54c1293c81c5d610e70d4cc6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa9469c0a1f840cd27768d7f9c4e3c64568ca3839faa6c03ab61bb27eb78563f +size 25738 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0f7bc1f4d67746a4b4de3a444327ddd5907c1e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c70f346d9b9379aeb89e4d7d9faf34c713f2c674ccc6af57ea724dfc1ce030 +size 25785 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d286c1f129c63a1d183172c90db90c1e1170972 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ffacd95ff205649b77c4c5186beca7a4ce39a4c90a01d5a9b745d9cbda776c +size 46492 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..785d1e08f840d1554521b0699774fe07f85c1f1f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:447e821fd197a1239bafe9411ec59ab0f36ea5f792ba7963f4ea147675508897 +size 31464 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea5e7d67b9874ae755772a834ded561175fd16a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06631d2ed6326334428bbaa997e76d8ea3ca50875ae047ffff9f7c6d04942ce6 +size 28803 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7f33edb824ca7340172d3082d4e756160574691 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c91358540e124c0b51d77e729ba39198b7a07de27b0898052ef5f14b74c1f32a +size 74706 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c52bdb04467d283576c6798a6ff8c5237624824a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a3456904f498bc621dc78f57b814759f4e98fbadb344f64ebf9032346443506 +size 30098 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aad44c8507bb7e3242f96b50b836bfb2f5804c81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e44d38aa24a05305da63d377bb87082e9b3e1ae980f440745a8f6a0ab7a097d +size 19106 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..734c5f9c7946b7c878916d240a4684b2749612cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f66a689a00e8d3aa3c76041cafb85296b0a6843f05eb1a5a7559f2721fb0a2 +size 87986 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9804e9d4f9a9490f8bcd74c74c31891118b5c592 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c2a08a053e03655c81ff1b3ae8c1d5fbba0ad9de0ad629be417e1b2b187891 +size 50017 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a5f3f1b3720d4f821fd9a46cdf0aae61fcc3751 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce533b21f26f7fab8419fc1343cfacae5da5cc3013961fd95d13e63349b148da +size 31584 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41aa93050ff3ad974bc8d4ae90ad7b96ce41d5af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ee5968f4c62a1daba1fdc66f4c9dbdd70ed40344b941b9570696d1f414426ac +size 145407 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..221b66eb245b4978e073da3d86bbe28044f264f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a94c95d8f773e6ce77ef611ec8acf393a9ecf16f9ab1eb8d672b7a25bb9c8f +size 44548 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..896810c516eae19bc4ccbcc07bc2ee2cff8f2f15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a06913f7bf6fdd9e6fbc7821ef24cef6cdb772ff3ea749cbdb6dc30a5ad0b2a5 +size 54392 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31ff27b8c2c76869912da72edf7e1e780ba338f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cf08022cfc22cffdf6fda06cf05d28ac9df75cc455c6d4c77ab42bd0437299a +size 92808 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b42fc9b9eaa214e05f728b91e39269207db8f0f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86269f7a924328ad446083ccf50da4271706e764aa162899809f10ebf0b2a598 +size 57072 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c55cc1272ff2a9e3eab0716a81492bc11aee06fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3917acef25beeb5a68315c45ba515176fc1a3784e546fb22b283f8daccff1a31 +size 58496 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..084076cc361b1e2adac7fc5044badefcea77dd08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4736b90a3b99cc7f1f73cf8ce4e8ae62c6411b40e2d0a9edeb09b28195ad8b0b +size 42886 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e34673f871cde5fa67df0dae5b31adc0e57ba50b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d771018c23a283c52183f9d0eb466742b3ad6386ebaef495658b6f44465e9a88 +size 143896 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..046030ef406df6115d07fdf8a91708aec254198f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17c58023deafd65b4e9d419d9b3c3ad097c66601ca1a2fb466f6cd52690cf1c +size 71215 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7629ec924fd991ce219b40a9f8aa73d4e0c4468f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bf912c976305ce5411fb2a23f88de4d5e752911736b17281841dfbf155a8392 +size 161966 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8aef66708164f78902b646bd976c611ae5bf8a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d46cbb175b3aa35f52b6121620c56722f90cb9eb8d744cb33a44c7ec6145319 +size 209484 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a79d869d39a2f6b477288db0155fd7608349339a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a224f1ae3927fa962626c5990a3d757e1410ad4a7b9429186142b62adc597bc1 +size 49925 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..208f81f5806d00507771eb2f08b4bdeee64d1c24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1a7921bba790bce7be1932629709acfa71bdec76ba8647736d93cc5e58b576a +size 31530 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cec6fb36a3afc577f45ef1cf2f57837f27c5a14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6766b5f9360d447514e819cf9f1ae73a3eac93f4a41c5b15d177ff9c92cdb3a8 +size 36079 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71cafcb68f811d36c5a5e26c16d408ef022fb63d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b61d5a454d87ae08d470700d3d3855df8d40f4d4bdb62c9d759f3278bc9141d +size 29463 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a83cd2df0d92f8a4749825f52956e493656e1275 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05f3f388a4aab3708cdaccf128e7223a30d85ecb3e9535d7533517c2084cef0e +size 40322 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53fbee534897d7efcb3828d937cb98e575cc1f47 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5727a6367343be69db35bb63da5bb029149294e5027e17944b7383213ae349c +size 27062 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd57411138b37885a6afc1572820b3fc067fb3b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27c241a46836691dff419a9bce601bb96d7eb168f011316def0af02822c48222 +size 21691 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdfc53fc6e9a7e3e40e0510693bf20ad380e05df --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c3e49b02e3b30287d66f9e1483b53884fdd9fa5e577db3c8ab3569ef87f3b4 +size 57376 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..101e6ee6300a302b014032998825654a671fd1fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61f890049fbab55e5007646390164f12d62f26313994ee9dfe98f7ef4dd5b934 +size 22285 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c169074043e200a9c1c70fc4b0fe35e1ed9161f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f937dd5baa086e04b5c138d764be2201d805e188099c539b9c0f8872b9c954dd +size 169120 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc081a5f7c8ea2ae4afe18f1828926fa069473f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e60f4e5128ffc59c2fe50a75b18ec04c9835d79f6a1d7dd75db53c0dc8bf7afb +size 90319 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66af5b45f83389bbb880ce680e98d8ca2bc4ffac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66c4ecc1cc988092be904b06c9ba4a3625aa8256f60d9f96b0b5a9c850d92266 +size 154007 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2029ab759db42b26dece686a33bd2179a1dd59ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0babb876164924b6a0d6cf571f04fcaea40ece075bf2c6d4b699da302589a192 +size 79424 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7a83bc05a10f79ddcebcef282c2f7a541eebb11 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6206bce1c8b008eca02d4370843b38d0f9439c188461e926c2b7fc782e5b427 +size 74338 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56c5c4edfb409d0f47962cf18cc11dff64461013 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b5921374b84cc57020435ccf9faccfdb644ceb99961e33300d483909567551 +size 82535 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5139590e6e768edb7a45024381fb616e908f5987 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:442450f95520f10e3bba67ca29bc7364e92f14881bd8132b290b30d2279fc577 +size 89928 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7c272498377a88c9a413f4ae1bedf38e580cf2d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4617c9d8a0e440198db60c8ab8f811e24140a6fa1daf98859d797d7cf8265127 +size 1005205 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15f888204ca9b32c81025b189d348e78e9300206 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3646c9c1d6f62ba2d7dc3f133af130c331a1b856c4c3de881624c6c84f957d +size 133057 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d6de60fba2cb4b6c78684dd558566089714e76f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d91a29583f7f23dd2afd2d303c612f5d007b9239c7e3c175a8f4010b39026dc +size 183131 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71a04eaf8abf5eddaa163a0d23a9c6623a429581 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d03fb3e33f46688e8ac9b4d113e2808bbd75ac7957004028a2b6cfa9ff18a0c2 +size 27522 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4150b90db86862b4cf319190283916ca34dae38d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec45a25925e2e200aabcf0399fa474d27b50ad9cc45b45d63546bac82a9c4805 +size 120386 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8245f8ecd9d90d0947c67ec8adcdf13b75d27489 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baeff8ecea89319733101c2874e98c669b2a4026453f8c446b7a73bb2bc39093 +size 58001 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6263fa3cec7b7db8ca67d1eaa0f59f7d40d95e64 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827902f608afe670a59d4c14afdc2b1e99a5895497bb3d43716cc924e71603d0 +size 25393 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c15a00589346c10aa665f5d43858e15905750332 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:620c8fdefedd23e9f3fec94dbb19de475bc9eab5d2f197c3a84173e0a575c5fb +size 39452 diff --git a/eval-results/mmlu/0/ckpt_294/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_294/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8dad785972e4b63e830ad9aaaf5ab3a5bb24cc22 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d68e2f7c99b445dd003f79eaaa31b696b04a366025ad6fd24eed8d37483481d0 +size 32945 diff --git a/eval-results/mmlu/0/ckpt_294/results.json.tar.gz b/eval-results/mmlu/0/ckpt_294/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f625949ee42ad12b0d0810b80a89647922059a91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_294/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e322c819ea649724a359a35b255f2ee3e725e80aa7d6242df4aa5edba92142 +size 7623 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0776d396fa285d362245c17e757345b63e5454c9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:797daeedb683859805ce3772f574f15dc0d73979c71e21d01f474ebf6b772f38 +size 17067 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf25dde81c3fd3fff4e18036acce68c5ce104e97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24db2dc377b83d468ee89dd95ab930089115c98f8303fc088ff8869b8de8d5b4 +size 29843 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..771d5966d668894b0efb948dc49efd7389679a84 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c380acc7a54d3af17db5c2eb3ea74fb164ada480519adeaa542cbd89e861649 +size 39880 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1295f89b7a7c76c5a0de40c9b239186c36e4d2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb4302ee3af425dca63a7600f8d6c10ff40be18ce6e57949e92d7a498250c1 +size 26779 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ecd56af6e3ba830bd63384db42f0763e450cf1c9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a453f46258ff55101e8f16eeb7f1bd340be3a71cea76aa2dbd6916b06b1a399c +size 61250 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00ac4d2fd29343d5705ed934e94f4784636e462b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f2ab491ed386351ab3bcc888d8ef5cade3efcc3f69b858c5425be134c736604 +size 40331 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44ae4c68f087c53a3f2a37da65dd516782ec37fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:989d3466b0dfffeea318c9dcbad171685f7327fcd047a8f6c7d31405109c89a5 +size 23820 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acfe00687ad66ea70537017fe0227ada7dc97fca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae0203ff5360e8249c62b762bb51623c29d5bb45e3238cb2204dddd0e2527a7a +size 31107 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17afd67610e719ee1e1c623ce384d07aa56f71e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7daf241da14fe26110eaeccd6e21b5db8f8e159c0d26a8c436dcc1f470be344 +size 22932 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1c7dcf8120a4fd815aa87eed3d028d825969071 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc5017310d6b50b30bf0ecb2243ed91ac35c711c60f0f70e3e84ac730fcaf3a +size 60862 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03b0d4ad801117c3d1997d36959747f55f5a6d20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9d086d61833a00153fb449b5ac6f8804ad65011467cce965bb625700c395353 +size 25734 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e982920d7a0796f9ca76e0887cd2ef40e293a293 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba13ed5021838db3c8ea26bfd8d04e33339812be39d1979d0c84a091c2b32eb6 +size 25770 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74527a191e3c292d4d484678ca14b2c486994789 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f61b7bc08c9e6659d71e312a67b3a3b77670b3e2dc34e0ed7ff16676c42a6b33 +size 46484 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ef15e990c50a8415fe683646135113b4d34a484 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f1793772faa3e40146cfd52f032e7549531785e9fb278ab546bf1f6f54857b8 +size 31516 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..420977f969ef38db7d758768480c6117da0ed01d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08c8217fc48004270af5a120cf4f6ebe5e2e800bd95f953ed44b031d7828d93e +size 28807 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f00c242e46b66be43393072b29ea5d5d5c5a512f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6389174f5311f22dffe9f158fb0f0830c5d28a4f4d9aa7bdb04255e3b4cfb2c7 +size 74697 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..156f956920804f597dbbca5a74ad9fef30c759a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff48c44dad9c5ae5e2c991f8a313096d4e7c4f7deab39c8cba428ef4bdade22 +size 30109 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..236fe3d12b45b3c5645f6f4534432e19fc4a829e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f27d4bb8939406016b5c9299fa4fad94eb0c611176e20570bc32653a95b3bff6 +size 19147 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec0d905918024ad62fc8e618a6dda940ff960636 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8028d9f3f68cd3fc59c52efbff7937271a61d033046908bca3f13727f9a9b7f +size 87911 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2eb9b7ed26efc1f8eac590017de9bb7f4728a20e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c59b65c9f00cda1d295be6ddd2154370d2e0a79a78cbcc7260c32e8d79c07a90 +size 50027 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9058ee6fd5fa31495338e38700863d34eb38e6ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1446ab17738df8c434d0c18a185bddd4099505a62354ff29aaa9ee2a42051e8a +size 31614 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93d6e5c58c1b0ffddc5f128b16a5317bc5bd97b2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:684a3c80ff0977e60b44e6bf307c0a8c4ea3ebcdd6993e4d811298d32bcb87bb +size 145348 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ce4c53c002c8397a9d9c8ee9aa953ce8d4d5552 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0a2c44ea21586bf21515c00c7224a9d480305fccbe5327a3d17d28c771930e0 +size 44537 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c654cea55a33a05e4c73739601a3811d2dc9db8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8526b89f9e6e2313248d8681e6fffc1bed23162ba25a12234b511e9e8424be3f +size 54318 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a9e9e70710c9171ca50617d71867ba1202e24cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5fe7867dcb2a0d51ba0ef8f472290a631f8db07e810f3a199917150b80605e6 +size 92756 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b6d49a294525dface8cd4820630cab97caca87f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b21ee5a546d46e92a1ff3e5cbe7b9339142f8c29001cae6b2a61ec6bdbc2126a +size 57071 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d80a185e5da505717b95cfa0d803b7f9b761cb8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c1620d06890a138e916edf13e8af9319d59532e36e5908c171841c5a831f702 +size 58443 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51d5e26851d557e148549a6d7d70a798407b5f68 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a71a7313d701c65ba16da81b2daa02a7304b55e8bcb9e7b8b6e6c63a576b967 +size 42906 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b4e0315329eb00b745eacc596afcc528d6702f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee12407aae2d8d9a6a630424b908f103c8e1478601921974930245ae33fe131 +size 143903 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7a36496134adc3944dfb22c9effeb07874d5773 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00621f6bce03e4c190bd40e967ec8646059c6c5bfb795591b46425862efbc59f +size 71207 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b448f1e012391a06a5f9f5e1bfd41b5d9f6e9052 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29f9c617bda94a81bafcee07389c8635c3da50fc11c79a79e5380f32cf48820 +size 161976 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..916bb10c52ddac8dd417ad7403da9fd362f852be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea44e99e0220e39b1e3e8edb672e277ab18bcc24c616ba793bf4f8fccb76f8ec +size 209490 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6929ded7c5d4f438c48d2cc1b9906c1ed3d296de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5524c0c87403b5f37c2e943a024a07c47c8dc5dfcb16a4b048a07ac7d7965436 +size 49884 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d5dfefe43116a1c962223d4d2353a7722060da0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc2ec7d1a533d0952f6e45ca835b784500d7844e0834b596880787b7a5a4dd69 +size 31513 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c14470a19cce6a05663f011fc9f56dcfc01eeba6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2670e0ff9658b8772d972d70dba5869b466941ab6aeb024dca88b4127ceea857 +size 36090 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..558628fc96400de483f65c8fb9edee695f76e579 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc55aafc787a5d51f68c87d798faa2d0b9e1a5b88524e7eaed32c8e3d1d851a +size 29506 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18d962cbb92bbd0f5a503fb6906e5b5ee32e3ed3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7551f45b74138d5dedb8bfb7aaca78db742ec386623098b0aad4f3c5d3662676 +size 40246 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a3f8b4dab5ca71222b1e9505fec89209e95ef5c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0f310752a40fc4915bdb186406f275259db1cf63cf302b167d6005e7513ede9 +size 27041 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff0d70d9c11cb0ddca39dccd8d5ca76b3bb05e15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:391bddb28a2045c3c54b9eb2c15fd1741ad29ce644304c91dbbc42e6a0efdd55 +size 21684 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b8a52afda216bc36d8b48a1547d73f6b9f0e646 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd7b2c4d7bea2f62004afa1e77c1a8793efbd2d01d5145caeb93342f46a5ceb +size 57413 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04f780b7f23ed3c05ebc7add64fc29b9387d5482 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b248bada4b26248f5766590566f43da7b7846af69c15fdea3d030dcd75a9bec2 +size 22280 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52232ad5bdd5e814e57a6bf3da2cf8e09b64b117 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eca7d0d64fa71b2caa9c48b76bde30195467794d5127df70a8e06dc73043c1d +size 169128 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fbc909f353d030bf4c0f50e6842bb3c3bac97d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66add3f745701ef7bb4f4b7a8be5998f2abfd361faa4bffa71f89fc3a18712ff +size 90259 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ffd0e8fdccd5cfd6903b74e1f83542f7fd46ac3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20ae3dc611a2d1f8b634e539f71f356b982c05a46a858b7b3fa7af4455a99be1 +size 154289 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31df472c3f9412682f5747a2023c60136bf33af2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bd55e4cadf4b84452457f8e24cde8c20267ee36708ef1f7824dc758e9f06fc4 +size 79409 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79efa969108c8c82d3040cccdcaf627acfbec26a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e5342c01cfc3254e85112c0d6c8706b0a9d4761f6acae0b4386cf4558b30c88 +size 74365 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a0003fe7962dcdf31637330f6da6c5959a5ef9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd37f1913d8c6bba62c06023f25524987349221d4c646bbaeea43bb9a6bcbfe +size 82518 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..202a6da973cdacf5dea1b20579089f83194f796e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:530bf9c54049cea565cc99f7b9abc1fedfbe5ec4adbead26f5cd337f4389a7c1 +size 89892 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30e7e0dc304ce1bc035d18cac4b4e1e1b64304dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c06aad47a77432a81343529957acda312de41c89caab8858adc5b222959e9f69 +size 1005039 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7fa76de56e0d0ae93c88b0fb9dab5a06b10da8c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ad11fdd24a18534509b5b4fb1ce2884ba0e8e5ee9622dcda6ca8bf27f485f3 +size 133068 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e86dc6d28720aaa53c9e089594944b1525441866 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c20c8b1421c4d33914522d55445e8f806207d03b56cddbee99ca37e00e261bd2 +size 183063 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c071c18fce9e14303146371bb7e2a8783e0e16e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7233eea2c572b828de53fcc6f3ba50b55f3e0ce7a9e5c7709f1a1c68e5f95991 +size 27542 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..75e43592bbf3899e31411ca199761329b2bf5fe3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39df49097043d7c2a24989e45fde81059e35a1e6132bbad8b5df25b2b9ed0304 +size 120410 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a14206250799209e53d9900f9bb1e53bbd5bd78 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44eced386eae113f836f8ae0844a85be015a39b10423ec6358b610907550cbfe +size 57985 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca394c9d8edd655215beccce5be92d6205073397 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:285bafbce7052812eaa8e89a078f28f8b5e6845d1d66cfed32a7fd1f6176dc30 +size 25436 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..768dd78071008913d673b20f0ecbc25bad802260 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f0c82ff4f4810531bc4a2eeadce94ac7b2b5c1131a7e61fde6a3086cf91f3d +size 39440 diff --git a/eval-results/mmlu/0/ckpt_297/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_297/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ae3ebdcc3451c90fd76250a30be963d92ca1400 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c330bb82dc9ac96a867669abc01d402d2bfd5f5ada07790f3802415c4f7e98 +size 32889 diff --git a/eval-results/mmlu/0/ckpt_297/results.json.tar.gz b/eval-results/mmlu/0/ckpt_297/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed9a6e004690a7816287f4e2753d97e73468ccde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_297/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cce78137f4c481ae41740b35fbfd671731ac4e91bc1f56e14fae6aab9826263 +size 7603 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d589a09c1f4c77ee451e969e063b8a03040b04c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47371206f93c5e2c5073942a639cef956bdc5d75dedc18359d8ec7d154cf8a3 +size 17094 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2bc011e5e605994c2c46a482650469198ffef92a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b1553b5783511a3484bb608b75b43557ee9658819a1a486daa37606721ec6c4 +size 29819 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..076ffe60a0020375ab66266532785c9852b8f681 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89592e9391a6465cdf7ac5bc4d812176999d310a7c1ce67f9e99d9a2205e5935 +size 39853 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c631b9a100ffa1f2f7a42f0450000e8fda7666e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:964c13e258afb357090920bca079c50eedcfc8d814d8d38d45677dda3d8de58f +size 26751 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..541dc11c26679cb71af0481ab9c1a5eb8f22e69d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0bdbaf6574f2129a7ba4f8e8ecba6d6d06fab8f89926f035980dddb82d37dc3 +size 61252 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..57382ed482dd06df8b121582a073cf5e0b52d7e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305b56e24f8d596bd3e6394a6910cdaf7ab8d9960ed6b29cce026f9ca83ad02a +size 40372 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9ce4ae6b34eb019fd4f8e7cdd3c6dc34f48c3de --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdaaca8cce01bfbeec4caed73ea8fd68148a565d9c90d66851f32e710f4afe5f +size 23827 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fad9a621bc5146181fd0b227ee1dd08ddca52bd2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3649ebed495243144ad6fd3773d34711bf280935c9c8e72a90618a514440c983 +size 31132 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4bd2bf7e8780f3a393a22fcd989cbab2c4c686aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec3367a04cc56b79135268e2d84f5e739ec93fbaa4b0fd64ee263d35bfca753 +size 22909 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e60597cc9bd91d0168396f1d83a235f6cd0ccb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42deeda1802df3a72804fe441a0859a98dc28cb28b0f4aa14e3f39af817b2272 +size 60910 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0455190f9657d14ef131b3ddebcc4a402864bc28 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f304e87ed6d16f8b5f2c6b313f5f809d26c4f45cc7e83bbd215865c6ed05b91 +size 25751 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..170f7045676a2538c722f3896aff875e0b31ec7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e255f5373fda3623d57ccac50082c02f7dbdc85072f0979016072056ee46ca40 +size 25778 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49bec07e8b2caad362a8b6486c998fad419b4173 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd7aed894febd2914dfd34d064ea81e4b568f0c532d2bd51a225c00a1c032e82 +size 46500 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34b9d3a751600e7de3593ae295d850148948afb4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b162ac965660bbed014c982fa4433d2580b3674b9cc8a8587ab78b693cb1a8d +size 31503 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13c61c0ffa9f3eb157d7a81688b9020ffd47c823 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a813b1b972dbdb494c920d8266ccc8acb38a50cff5303df0a2c0c690388136 +size 28786 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82985a0f287199bff93e6083a65418715585e869 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17966c0ba94cda8be26d38d60f49a4b54c3e6e7b2fdbed418c625d9a1e3b7d70 +size 74707 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46ad28908e76503607abbbcdfac07403172830e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b6c0922695380cd0fc4df39109f6785fccf14cb0c96898694efcc5625552b71 +size 30148 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f4187eb6bb83b958c9fd29e442e0c2a8c429afa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7026808d3fca8320c8ce55a8d66283ac5748c9e7481dfa7290c78234c300ab1 +size 19105 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e628ef10be29d432fd4e7eb2260359b915f38e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96113bee4110d33371bc18c9c9166ad55ff38935dcf885c3a75dff8ce2aaaa64 +size 87974 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cd851817d30382df40abbe6cd808af485a73923 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d60e875f6b4f847375cf2fceda712c5490eae4ea5a4eb6904df1e3240fa2713 +size 50053 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a3cc1e2d336a09fcf92056c2cd5bdfd37673f92 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22adfa8b4946b04b0e6713d7f746563a4f1866720146a7a5a3af8ea17c53bc89 +size 31616 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20864593d1ee02d0dbe5f685a64a429e29904c39 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b4425d6983ab900b8a9587a1a6f5092936f83381b7bfa7f75726bf2f63bf002 +size 145385 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7b34dbc91c68b3ee9c0288542870b4be7d7d516 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668094ebeaca5796a9ab1cc1f6b8fe164ee5920a6e718a00022f217df760f5dd +size 44604 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2d742a3c8bf231b12aee9ddc0ea5f61a69856be --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa841aab14d57fd13839b4cc0b2eb6f0910d0514bf4e2586361043ead809a38 +size 54338 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e6b3943858d04eb79441e48d1705b03d283d453 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcfd3e9beaaedf7588ebab825414d66644a1074b002eafa8bc867a4ae3fe06a7 +size 92748 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19d29741561990fb7097c7211bb35528c9259bf4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf87d4a983a231321a68f0520e50f6ebedf460271bd11b4589c69d66ddc57a7d +size 57123 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8eb12084ea9833caf55ce189371a1858338ff746 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef1e75e0aa45206ba67ee2ace6896a11a910efc873c4213f054b279e2aaf0ed4 +size 58510 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..246dcb0350c6f98b68e710d2e51ebd3c2b9d1c38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69be47d37b8455bac55e3e202997e702564498b18f9584f83db01d66c7e83ffd +size 42913 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7380ea6e443387637c65d825166864d308b2d175 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37708641fd932db3d714b0e712a81f85b74d123d4cb1b40cab813999b69df5b6 +size 143774 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54848d77600efd0b556f82d009cb5e0a23ecbdab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e63be6f4392f1b732542c8104b31f6f80da1f035496aad88f5b70c3b78fef31 +size 71226 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2c02dfb3362fdf104ae634c17c9d51068a839d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275ca3fa230776f7ef83a3aeb7afe9e5bd13d93be5171b7b55a91872a99f7e2f +size 161968 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5982e1bbeda67348e8c56092fa4423dcb33df1a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e147d9335b1a1ca7052550e763db47479acc0f931738b3b186809b15348bcd3 +size 209461 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b4b2267ad198f784a90fc8373d94a8196eaf82c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ad3853d10bc37ac6e6ec238b6f6132e0ce05661408b1d433df97950ed3fdae +size 49930 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..220fed3ab1909ebe63ffeaaa143cdc917beed9ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5faca3b1d80f41febcf85b67491ebd77f4ba9097ecd3ea8aa0c69d74280f30fe +size 31530 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86b449a3833d9e3a34f7dde647c7430493ddc6c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c1acb7e7031fde7fd689613564d6a6059376d8787a933f07fa938e7bf64045 +size 36074 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c8b47848de125abdf5857c0b1ba7f7660fe331c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7e203389a6d588776d8dad93911047bb7ee10b381a913b9086c1d4e4fee8255 +size 29482 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..641d9cd9a880c69b5292324a75b110fd3c06d471 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd61f285c13f69cada49ad67cc3fb6c36e80f169ab12da508dbc24b6dbc07fe +size 40307 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc8cb10354718a4c4fe938b62283a5733d27eb76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:795a90905748140b23f88eeae0495d177138c08f4074f4f154f9240e3cc6dc8b +size 27073 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..907e666aaa5b526154b82f5e53e6992c53e33051 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d05030a10b3eb5317a0173b9b201bce7fb25dd124099f6186736b21ad0dcb44 +size 21691 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3eb30f8ebd2bcee698ad1e69c3cbca09b187922 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a840a58bc72e18efeafed9454c915f946b3bd217122d9dc0aa04536f8c07fcee +size 57419 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00aa6091baaf6a56bd3f9ba0c88457d995d0797a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2180ab9a2c09eb4c653d229fd3b5a071a50806fe7013de96bcfa2f3d903fdf +size 22257 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b85451e00e23ef6536ea2a4dee269a372e3f7c6f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc0a8bf8a47178f73c0c71d9fb88703aa412f9181a1c83adef7ac343fdf20f0 +size 169101 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a473d7b3128459f60fcc4270e74d7e5efe322fc9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc12a6662321ce32b99fd1330e83209fa9aa4b8d5e5978a922452dc64c5be51a +size 90193 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfef782ecb3472e9339880d39215b6877881eccc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb37ab71974474f6f382670b1f58bc610bb396d16c8b27ee791907ec4afe7a8f +size 153967 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5c1d00096c1b9009efc476346e3144474665205 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db856f901872d494049dde35c842810e6df61c0be8ac060621b1022acbdb280f +size 79423 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f45b3bdd058b2de449ba3ac8879f092106ff1494 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4074426b74962395950cfeb1b5b148ec391737290a955b18d5b2a914aa56b499 +size 74376 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32c034d9c3160583ec4a04acceeaabe6025c9ff7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dd927ecf13b8cd9298e773a1a46715f461e127ee820daea346cf86c65d335cb +size 82504 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95d33b065bd3c611f0b7fdc4ef40d6403ff7731f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a133acb5dcb1a580cb83adcbf69050737396f159474b92a43bf60dbd947a28 +size 89912 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea29f4cf30fa295f1a7e964f576727679e30d089 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f75d410f580e13ede97884c1c8b3da79034e1c66228f081d32920c4409760b6 +size 1005320 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fb27b14697def5b0c168e21f151b527d547647b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0942212b322e64cd53f89cca952b5154dce6fb03980927c42fbb981abf557e02 +size 133072 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f0a8902025b91e55f0ce56cf772102da310ff78 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82ab491cc2e84be516f3f79546a11a09759af04290cdb2609b3378a9c61c6ae +size 183076 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95386c808bc650802550d87ba3f952dc19451799 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37524deefd6cc03264a6590e7a9edd8f84de37d3fa03cfc6c14533e2ed1bcc9b +size 27544 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd26dbb037c619e8833c78765d8fcce2514bbdee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e51a0759e68b0e56d89bd2ae2197e640facdf41b93604b376b02a6833054ac +size 120407 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a05429d48c4ef83bea13e47dc94539e5e387ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee47f59a91f91a2765811b40157d9d2163edf17c870798678ae891d0ecd4cf7a +size 57996 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..295c3a0ca40901aec94989a240a89ce50ef18eff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3132e4e36f98b9a430fcf65a48a491e1c015fec754b5e4cf4f59f087a3a2ec96 +size 25371 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61b7ac6b77197edd70b24894ae82c85bcccb4db9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb7983c552e72e481cfb244174879d23799c905ba444e578ea658b49c5dbce2 +size 39450 diff --git a/eval-results/mmlu/0/ckpt_300/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_300/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aacf712dbfac073ca7676e0a73edecbc8a6b5317 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e415c595ff0c8686c36db1073c8774ab4121033ba57e4b753447fc1c62a2dbc7 +size 32908 diff --git a/eval-results/mmlu/0/ckpt_300/results.json.tar.gz b/eval-results/mmlu/0/ckpt_300/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6db142b00a7d61d2635df7d6bea80b95d47375a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_300/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cacb73570fa081ff3d70babc26c36ed1e64b40ff9dc8d782add673b4f087875 +size 7643 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b05c3420e979a0112fee298eb658a6ba4bb840f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c41d00cd7fc6e6badc6a6ac1dae1d0bc610029132041fe007723eb5debc7c32 +size 17071 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea24709d8c7dc047c2e3c5be93bdb09a14b2392f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a8535b7927c82385321e81f06a765a62f8743c48b97d039d188ac52333c508a +size 29835 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31c7eab656c4a28057755ce77df291282cecdb16 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b348fc5ae9d7807855419755d2b819336baabd8a70ed9ae191497ab141983634 +size 39866 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e52ee1cb4bf261a4643c2436af7782bc4703db3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d91d3b677b153195efb91e96931743d6683c67974b99ab4c3f1a60d0986932 +size 26772 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b9bed690fd1d1659c0a854af7bfba75c2940f7b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46a45c01db5ffbc03375005b22cf2e9c9d8d71824d259cf4100d384e2c6f70fb +size 61249 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1dec7f0478668e9e49f95ffae94782508dfa578 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27f86cb3ca47feec223dfb1c5887f91fea5cd0f45ba0c4f3fa27c71c808bbc7f +size 40362 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..127ba304ba898659b9f1a6c4b220f93d3233e9af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efcdffedaefb3644b97ca027025f45d087d98e38d411bdcc3e309139191cf2bc +size 23800 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c554586171b46c6ec151ff51b054266a19e8811 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128c4c5cfe6c06b18d7bd1f5c3494e7064422dfc12557d0aeb295e22f02696d1 +size 31113 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..efa7ef28366ff5ae9749f55ecef4ca6fc1e249ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfeeee8ea80bcf0b7e83837fc929a9114652d49c2eaccc07e807db9411e27b72 +size 22923 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9fcecbceaf4150c18aea9a5b33edccb0ba7bd18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa3baafa15f560824017a2e1f9f19e25b96c752bcd9092b5496873f35466b14b +size 60934 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25a0f487760a1e6b5351de218a628f1f475bc4b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92ab20e110864b7e05b7ed6fd5b64e5e93ac65d260773873db4343e71820a07 +size 25733 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c6a3b4fd5429efbeff4f2b4528f1e9de59a8002 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70e8c683820b9e51d1f8a23c3d3decb3742756d75a434312f2216780c12b65a9 +size 25748 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d00c03345b28c3dbee766357eea580e8e29b4ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb49563e038ab853162ab5f842699b58089658a0b58b07788753df702e422aa +size 46537 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8416a2dbd8e71e7775f6ed5a764fafa30719375c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fe890a2ae8679ef53ea4e9bc94dad6f3c0d7a7b15b67f5f9ef3627b31d79168 +size 31493 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86f7cdb28fdd9e1814cc70437ed44b8e136fb70f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6862657bb0ecf8aebc5614fbf5fcd748ad1e8740d52aaad9071685c1aec33a72 +size 28824 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8ec1fc358fb35327a6f082c0bf3a49f20a81da4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:006fd0c608f90bd5cd56c692e9e33ce9b7ed18a0ed636952c953af8a362d6abe +size 74689 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c7b082d0dd4f28434d792e7ce21fd13e110d840 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f41c11c5fedeacca20596c90e32728fff49e3620c0b9bd83df925cec68b7ca5 +size 30102 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0557b614025f4172959c87ab5a0e5ce72ba1417 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f05add59811ac143d05226110c4fa7da3617d0314d309e33306c8928df42876 +size 19082 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34c1ee5f3ce98459cadeb945f98482506a5f51f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32fcbed7d1569189fdd0161b62d9de411858d10df00618e85d194bdf0624d225 +size 87956 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65b6377a9164fa18d51e61b623608a39d4021097 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d9d0dfa78552dacbd30a15500ce97992c9d1c9465d246fcec8e5140b6e3245f +size 49968 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91cc00aa940b7c283420fe4798848e0da7d2e72b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6191f22d36d80d6c24ee4411f9f7e1f43853d706d1a55ed0cb45d0a8da361611 +size 31651 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfc1638324a06d17c9d31a33e31c14fe0877be9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f2d46b65ae5e6329522da525edbf3cdb693251f70f57b4eac76feb540646924 +size 145368 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46094bde7726c8633dee0a8326d87b8e9d4ad525 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07385b856f245abc324cb6d85859f7e5679a0f1a2de0af7b168df8244098a924 +size 44545 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea530c9ddecab69b316609561af72e017f48267c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37dea28149cedf653b6624c51efa82a6f54ff1a5607aa796e3e4421e25c4b958 +size 54334 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd34c040eec549bc237b572ec9f7256589d62926 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f949c47eb8daec6eca3a0edcc711263e740ae42d2e5e93761793c64c6cd07f +size 92753 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a083b9c17d262b92bbad50558a83e731c301f26 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c211f8af2068f33a284eb5b2e33ad9ef76596d54122b38d0ed0eae150016773 +size 57057 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0717d7a673ab6526e32297b92e4b4593e411f84a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54b9b6e72696075221ffef475fe95780ac346a5adf9dd8826a7818c16bbc792 +size 58510 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9df8b894d8777749861980f7298d7a86245f6522 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e090a13a22a39adfa5146996ab6491b5ea882b41d648d0595b7befd2472f48fe +size 42867 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2541e0c029658cf25de376fa7b523f5fe99f81c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9cbbf533dc9102d6af158afa196e8d3819318658b7587290424da8303bc647 +size 143828 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ad135cb105b582147d7e25beba50f4f4c541c90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:593da2a40f0f716282e01f61e603d9001c70940b1ff1d6fc2f07576cd44221f0 +size 71183 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43c5545b646d7219e03dba3cb6ff0d69208a40c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a231cbc93772b7de2dba5cacd3a1f13aa67860a41f02cb4802db3b04abfa99 +size 161882 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb17dd02229f1787e4077aa9f60274b747dc002a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fc4f0d15e28f75b62bd525cedf628b96e0b1e901d17889b6523752dc975a54a +size 209412 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8598ee0a96dbc6db123ecd4c096530497170e8e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf394298be508e5568c1412d668ef820d5c48e584ae2e73617844f021640ee1 +size 49957 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0654e4f68baef2db71193b61c2dba56351703c3b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acde4d911a02e6f661f336d8be87c4d300216e4f8de0d76ad6a63e842b3cd31f +size 31533 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd9cf2b44ef480c116eef07c986b213aee962b44 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ed8e78ba53127c1294a0f1a9e8259b6073665ec56e27e89bda91d75ce4b851d +size 36083 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c35c16c7db375b8fe537a3b58569a4c25d4d9210 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83016f72d28e52ee602c84ee534aa7d7c810a09bb9282ef49d2f18125d763d8 +size 29462 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff22a13d9dd38d277ea0f3f4c461b4399c0a03db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a88ca9a84bb5f5034356537504792114d4318cd863f95972aa2fe6665f32997f +size 40326 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9264df09654d533e812d93db2228509e631e677 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fec9a61823d89a718ec00b7e22df5f38a54bcd5fe63d0429e08a56931ea58e6 +size 27048 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a3395ddd8c902246eb0295d05fa4a8755bb180 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d592ec04ea5bb90be4c4f898a327744d443bcd039fceb4adfc7ce930ee60df11 +size 21691 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c043d40f35d661ccc966001f146a794f91977a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9664c8ac356ea236e5e701d102437d470f5e001bf8cfc6f112737451e2ba8720 +size 57366 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6fca00d3365fe8091c86bec060b83dc01fc80a96 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e22990ba294428de46ec70a7c9548b9d234b0b26ea55a66f96c1f1ffb04cd695 +size 22263 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bf3479b15244aca0569e8491a4e2910f74f3e59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb6d8e7e90fb250e4c90d217437371b9f512864e66f684a4d6a311cdcddd157 +size 169115 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74cab63f0d6fba3b0ee249836ac37c80b32513d9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3110e9512dc084d8d7168dc78df77a8edc5489341c9ba8a7ab348d338b6c571 +size 90181 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52e40b202325212a55478e6ea65d60a169072ab0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e34e8aad40eb290adcc93caf85992d7a18f28db623c7829f47a99a92123df777 +size 154070 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8da9e1b842e04bed0d6583164ff5c391134b32f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6d5f594adbfef53d7e54dc4f59a4c16129a4238448661bc0665fb7054abdb21 +size 79481 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1718df0fc7c20f2e789021a3b06bce083eadbf7c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92a926a9acd6bcb4f2df2f6a323dcd24fe5969d80ebf35bbf006fdc43b922115 +size 74382 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6b484ddeed5090d0b20a76b378d413af983e0ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:746ffa59089631bb55dbd72495127c14d517d24cd1d0b74f7b76c8723a1161cc +size 82541 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1bcab07e615c3179f0ed3deb9e8a4e0a509a7e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c10a3f519768a69407113c832170c6c0dd5979bb62b33a3d4e55b715880299bf +size 89940 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4215473dabba6b2013c25b1eebfbff343c2fa5a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc7685189d7659758be46a2794d2a87fdc91ac924ac94b43de6b7d0a4d405ca1 +size 1004988 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a32a5086c3c6ff0c906491c40fcce3dcf899bf30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3db4c8126a228022296598327203e7adeb948f563d7693293f3216cf22b69b5a +size 133089 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1995c61fd4049c88616184be4816cb3ef9d1670 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80e38791166de50e04b6b1ddd35520389547f433efb56c61cdcb6a441f726d0c +size 183033 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..186781c00f6f9687e5c75720aa490c63bf43ff3a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b36ae71ab391859ce8c268153a59c98a258ee0ad586bf6d4dc1b7d0553ce7040 +size 27522 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..156a5e5e9ed64e3659636a37df4514f0ec8c35f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bba7d64783717dbaa78921e35549d30a95b9beb4244af0dda3f6e3db4f8ce114 +size 120442 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f088042478017954974f62dc585a24273673169 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede7e6ae20d4c9244ca4635a29bd4346bf56c3de0f04c856a05cbae083a09f10 +size 58008 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63fbc57938a91855856ecf9cfb68ac8e7dbe797e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edbd142aab412af1c3deab250f4e8451d3d34ad65dacc9a273bce8e6e9b2eee8 +size 25440 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..699dc53de0ec7a585de6edfb146f3f88af93db6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cfe66179b7cd40cd061d679053f8c8db8556d7a7154ed1048677c789475356f +size 39465 diff --git a/eval-results/mmlu/0/ckpt_303/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_303/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8754dc0724cb4b95bea39bf6e54a937abcb49bd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11b0e15235a84659854dfdf380fa3bdba19d2842cc353e6baf39fda9caed00d9 +size 32942 diff --git a/eval-results/mmlu/0/ckpt_303/results.json.tar.gz b/eval-results/mmlu/0/ckpt_303/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e7087d3a8459ca9955a7606bcdd0c19d67ed335 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_303/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4597c1239182a39fd318d8e9d437e757045b3a0f018e5f5e91184dcfaf79c22 +size 7607 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cd6a8f257d7b47972daca4b86fe9176bdc4d260 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e40867b2ffb414c26092cb6e60acfe3c411c93c8c8c44609860f4bce2449683 +size 17040 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cac0a8349c69c6260bc82f1ed0733b7f2bfd1e54 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db1dc8ff7ac27fe7e5f5534e04ba19a6e9390df3fa821e9029d085b4db0b926 +size 29826 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f111e646c75f1244cc147635d74881116a3619f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946b474eb6367f83e01b8aec3d6a4c03130c89793930c7d955dbf94c80451644 +size 39820 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d14acafe4db125a712634cfd39a694c9bffa490 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c4085514feaf321335b45b42f6f7c483b3b397be7609205254edac0a4db7255 +size 26764 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..97579c5ba23425355cdc6e4995fc810d301738ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:492378ef7f6cfe9fc502028974075f010f3221bb6f2d1a04cb6c9aa7b2d867b6 +size 61210 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d4a5d51e33a6a6a93f509a4bd56da38fc91cb7a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d931db223b509938f13045502227e3450978d793acf18ac42a064bd473de19 +size 40402 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d808722e6ae806d78bb7e99a341a925f551d3aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b113df11954a8db49c9d77924124102381052bb0d9a33549123bb2df05ab0d1c +size 23821 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a32daa450bc60f64bfc69aa25e05d1c2cf1de7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14aa61e50ed49ac4307fcd6d914f5920d88bd541d1f2cf43bad87c483c549d15 +size 31109 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..810a8481899acbe33d2bfe66e626757ca9792893 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c9170bbf5dcec6a5f981710950b33e80d80821279085da4233da3fee2d47c3 +size 22921 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1eae1df6dc748d10033f472c99fb52ea13416441 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:419f6060963415cf749e4c24fab852f56ce62ee5e99fb092002e610fd29882ed +size 60933 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9e3afd5fd47a6406c37329c69e58f17f616a46d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f597ef3c07848ee5ba052d63a0d7979de31471b92d7817184669e16211fb8df4 +size 25727 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..958d62968cf3d7dac15a7f8efedabc8eb0050714 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bdcb4e3c51f9445313a541cf3c589863b8868fc6bdfd5c9be4d37f6fd95b384 +size 25748 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..170d659c46fa39ecd07fb61f905ee88cb9c01b30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc72cc09c8981212b4fdca7728f404ecc63b44bcbd65576f68ea8d4aca63bafe +size 46514 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c030fc19e323a74625d4f982e58295c109ff8699 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199ba24079e1b5ebf5270796ba22c6f43b4739b3013234e027f151b16d925b56 +size 31500 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2797f557fef99c8fa21851457f4bcb0d0a2ba61 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28dcbead69c866f43c19ac1afca8d7d792950266ead68c9b5d1405ddfd075190 +size 28788 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24c7986a86e1189f84ce60fa0691f06be8f0ccf4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecfdfa3d35e78cc1c1c1955469ce0a17a0d61250ea9f536c7a0c4408704ad070 +size 74635 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4fdcd7f740d01c17cf0c0f9121ac24371ab6a48 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b6d27229f493bd1274f59c912b17bceb27a91ff4076b659e54ec6ca0738013 +size 30112 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e92ac712c921aa6611011265fd20f3c0feb0587a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06952b5aa0a0233e77e8f1fa15083d09ebf805577edafa37ca5e14fc9f3d1bd2 +size 19115 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb143b34bc57ca1b8de65e598ab78a83a50470c6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e4f4f32a2bc393085346a81bb1ba5cb985c75fc899fee2ed412cb1916413505 +size 87919 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e98ea250316ce7383d86f0b6c18c0c604f39562f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4915b8c9961b32bc70f92b800ce2cde63b8d3882aba75313817fa12c7312d3 +size 50030 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec6c7a66b9a7b88824e0faf7b5aa4d542560aaa6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9e3f1ff1729df93bd21d699abc4d62ddd33d82206849dda1eda3cf92d0dfaf2 +size 31607 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44c45ee3b164f5f3747d8f8a11df695a6f15f354 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d3652e4840cac0369635de15fb3b41df800e03c32ce31d51b50e4220629789d +size 145413 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13232d59afb0790c53a97853afdbf5d0c36d56b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:154787d742c7e048cb58f7010e5c6c0dec6ce06e214e7f5354bed4f11882029c +size 44558 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..994aedfc61422d7ea5d02d12a77b074064e0e7c8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7d086cdd66a0884508371cdcf8b4e8f18873ac9f01f771e9c86cb5f63a9f3f9 +size 54346 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..031e189cf4308c7044ae06ece58eab2c3886e08c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fa6bd40d4dd1f8865e93f2b1cabd119ad4afcb61e3b8e9503f4088fecd4c8eb +size 92790 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ea2555d488955eb4b99559d42e34f46de562a73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e74c1c3b87ec60ff6d9bb500cbaad308d3ed6f727b3344bdd4513cdfdd4cd61b +size 57083 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f342fbf9983018ab3ae7d0a9ec71e7110da28eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb40cf122c53cfb4544ade32b142789474d6d74141f253c22c2e64dd62210388 +size 58504 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4da1d513b322fbe03f00fc0500bd5043320a38d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c323ed9e0777b69a28354a3bbca96f7ddcaabdf38fd01d5e1358e63164ea513d +size 42863 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81f0089b23a4b1002bd2266fadab83d5fe04f113 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808b6924c95099a17321af5bc213957e8eeaeb76808b4da45de3bf452e42ca20 +size 143768 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b0f6d19c930bef9991fc49e9c250fb8c346ffbb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3433a65c98d0c304149f4879167520beec42fd999679de9c7a91ff902256be7b +size 71192 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c048184fff96ed27b8e0d7ff098282c4c7ed01c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0e4656d44254ad1ff09d04053d014951880ef1b1951e28fb3b5ec55917a6583 +size 162002 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b86b8eb84eea6beddfbc33376d37c47273235d65 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ccb75a1892905990927672d4ed85d229eb83c86227fa8ef8b653200565b88ce +size 209510 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d96ded541fdfc4a76744fcfe5aaab6296059452 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:104059da1e6096728fb2732e5b573865b6940f81f0702aea02319b651e47ff5c +size 49876 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d60d0bfe54287d879b32e1ccc179599435e83122 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5993c105630f853bc46f20d2837346be67e3d98e437c5b6ccf672c3578652dd6 +size 31519 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..276c13fc93c230aa572abe2272639cf020e84a9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03adae1965b28d6762a0c0242c362852e261e594d7a8434ef891541fb45783cc +size 36100 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0abd61c94757c4df56f6feec5d1e54642c2307dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2273154155cd3eebc89c5195159b89ac29bfa912384c1962d7c89bde947e6f07 +size 29471 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d99ed2d71be440c9aa7e46e5150c97e3651ae4e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed008aae693136d4d8a615f62f8106289dcb9a44006135554125328a99698f26 +size 40294 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..634e2af913684667499bdde0bf770ade7f451d2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af5224f1beeff42a7df640bb7bc7c7d80844814db798ee589ac56f6658bc188 +size 27026 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2663b748e0c11167db6609da0616d2500ca77f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7de0aa9f45ab372120bcf7a97afdaa3141ff9ae6466e2bf1c222acfd9cfabd8 +size 21679 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d76304c46ca329191ec02b43e335a23297454dca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9ff0af6da91482bd5d31e831b605a711a1ddb5db0edaf6af4d4d57048934e4 +size 57427 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2ffb53da9cac124e470382b17cf171b0760eabf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b88aac05acfab4bfbd40bec7531be27a006fa523941d9cd1fcba19dd6ffb1e53 +size 22257 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2cbdddf1caca77a7061109c32ed56d4ead0209e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db3359d1471dccdc5bb39b48f7148bf6c11ddaab5d494efd1041f39b5aa931d6 +size 169109 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..314a6fe445cbf9ec03fc11f4b62853429fd9ee4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3f06cf7152f39872f2ec8777abbb049d8408ae79b75b9d9629352da79e52d8 +size 90206 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62724d904bbade2ef537fd654a8419ca223cbc42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3e7bd2bc0cf1584aea3049bdba017be1e0cf0b0c33f3f8a7739b4f95f360d4 +size 153929 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff59b3adf174b81d03052ca425b18e6a9a1e7e55 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ee03852af96f593e33372978f8be58420b20a16fa16efc926dcc22aa8577ae +size 79456 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b041f69ceff3ecde967e1c4dbf6f5e5f81444f61 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27160ec183a8c773d6d60a7dfecb4d90fa6537d4442bfa2cd797de0ae6f4e639 +size 74305 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df6ac0e6f258a7e7a01dbbeaeef1c0347fa5c7ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dffc369e90a4eed012d2b314657cbb59f3ac99cbad68af5b94847fe06f2644d +size 82509 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab6a6a7e8ab146d14275337728a5b71a0a8b2eaf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5336c66e6fb1126d09528395b228d7f95b6167be2d691151fa677d168380d15 +size 89864 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61f9c4bfdda5c1715173c25c6350902cbed1ad4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2edfd138f89c9aa280b3717df48ec036312037b1b6ef3695e215ff0a6d5e1aa5 +size 1004894 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f6aabbdaea1a3155cbff409d1a86a570b7fee6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af51e921607a1adedb78dd16cf1ba097fdee05effb8ed1a234fc0e09139c8536 +size 133010 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..017bafb5d65a70e460adb80eee2f24e53942340a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e2ed5f664333d425b68380fa87efa55d3cc666b9b41b39d1ed43f572cc1ea16 +size 183038 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..432ba3bb85260899cee3206bc1ab54fb8da0dd63 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c7af9108333b6884de9f39eebf48cb7149e5f6e6f0fa30c2f58aa3103458edc +size 27512 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66ef6943e08b37b8b747cb00e41d80e9771ba6c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:136a0cd6d1cd7e501721eb5569e119c2a516ff4a06ff853ecff49edd978db1b2 +size 120470 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c88d3ae07a9e094c4c49b9ccd4066f801c7b1d10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b3d5e6d89bdb63d2e2f8a7e8ebe698504911dcb4d00b72ea26ab46cab03eea0 +size 58002 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6af5dea477e9c4b5dc5dade2b0772f2cb27cd9a5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38e0514c734d4dfe30b2bc934463d6356c878cbe5fdf5dbe83399071389ab165 +size 25458 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d8480401d4d8b88f9ac57f72c81efed32a4e3fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb599c5afd0eb04d27d940d4ab2674698b5db2c585e8e58850b9804bd8ba368 +size 39448 diff --git a/eval-results/mmlu/0/ckpt_306/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_306/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ef296990e1c88beedf7615ecf494a1ff341ccd2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cefb495ead1dbb760d1f32b9da92036b631d9cfbf8a3882b652e6e2533b2a1b +size 32908 diff --git a/eval-results/mmlu/0/ckpt_306/results.json.tar.gz b/eval-results/mmlu/0/ckpt_306/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7670bd6b79dcdc96408f9e96ed0c16ad74c6d226 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_306/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f989f645ca7b7cec3305eb70e207998f2c59762027efb2e19db43a6ed11118c1 +size 7591 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c79d0b97350c1f652c9a4eb995c1b26425d532bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b6a2c0be6fc29992453b6af04abd50cc3bb803c3bd0f11eb33d0046f0ddb1e +size 17103 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3dc6af070216334b65c59e22ce603def4e9b3442 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c15663a424043ba5bed79c6ad7403fb56ce42a3fac89ba1d551e10ea5ae8eca +size 29794 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..970aeb769208bb662257ad6a4684699a7bb6ce7b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15aef21f71876b7e0d928f0a6675355392b8c8607bbbae841ff9b4e929a7122a +size 39831 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aefefead2921046ac71ab2bcf29711bd6d4377fb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2a5416fdb34fb5c822121892959fbf9fd62c63a542d602fbb470390bd3162e5 +size 26791 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..241a76facc65d2f162bcddf60ee8fe863ff82c04 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c563483b4d278a5b5fc409671215e985056d2736bcad55c98103f79f2f5250c +size 61224 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f349f973f13c8d385558d461fd86a8882beb6ced --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca1c2be97251318836f91ed21754f30ae3aa3bdeb8b3d772d106fe33f3f44d9d +size 40395 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e29f975cb0f74babc1f13b9eea34dffc7fab7858 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2565f8e50e338fd6654113dc483708786345f4aa3d63dbf254b28a252cfd025b +size 23806 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b9879252034da7a5de60a36fe8850d9f1c86ab9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ff83696fb6b66a20f9a1c1ca24d293d2101d0c36bea34e34bb33835a61110cf +size 31167 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99991a73e20690d9c843a2b7dd413f2cca409afe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6421e0ede510a2bf36cba69bc573b4ebea6f9cbe56d67a38d0d55d0bfaaab808 +size 22948 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79e9c9e0b76fb1500cdb7bf91f216cc5632e4967 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70df16f6949ff3ffe6cda8d8e1b247e90164dfe1489d670144e47512ae4c06f6 +size 60935 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3006b886aaf4b7a76ea95ce1e142c8042f3a4cc6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8dc90f02a837392f8e899128f6c3fce9045ae9cb814e74e38dd764306c8a9c5 +size 25772 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5de3400ce717922f09ff2a5af39d836e306e023 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c5803dfb6da8895f64b293f11b32f4bb4dd5d8fecea52361d3ae502f9db3c8e +size 25763 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51e31727fb8f70e048c36c58dc6c53e03ba11b3e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a58db066bb94829e5f1e620e00f4378ac83fc5c6a15a8758270149b5aae8e10 +size 46489 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dad2a54529427590483181abd3ed91e5de318f99 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b4638fb303da685922f963805fdc034c40304173a84d0e619b69be0a559895 +size 31520 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a4f132aa401b62c1b1036ffdcb06d554ff6dc4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbb66833c443fc6d2947eb5492b13cb25a902f22e9a19241514fbdcf4510d07b +size 28846 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e341402e8945c60f47cc8f221c538b1210717804 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb5369f79367d60d9f49b4f5382ca8ff2c927cdb1f1002526e1e3f85d2166cc +size 74725 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..acf85ecd827a598a4dd1e0c3a8ec42329ef9f610 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbafadc46a556bd1a96ddad2f512972f8648465580b9f30d129af4b2ab828130 +size 30155 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e3b54049a28168d7253d5cb4666a73099e9ff40 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a805f5c0a2aac62e11f41c584c8c3146be1498221ff89c894484052f8ebd9b94 +size 19132 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ea07f8596aba9adbc4ea2660b2dfb311d8f455c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e98a07f7e0af5a5f512f13c5b9abfd37f7eab6c84bb8a69a2e660afbfb308b6 +size 87949 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cee0c80ee3ccfb3919d554748745f3104ee6a10c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff88cbaead8dc5fa0861b189f173f4eaacf7d130bc0e9e19090f2e9e20263906 +size 50034 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bea99963301ef5cc8b44ec82133a12072d26bee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f38a916782b364e67222fd570bbaaa779a0f76d9ee10f69fc056e6616472adc8 +size 31627 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8393cccc6db393a4f3646f75ea4888acb4e699eb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ca7cc1d299e1f17fef48c6561eb99d9eb297cfb05c328f01dfc22a23c42944 +size 145432 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8023963bf381d63d09a9e6ab1983a6b70e57a30a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8b79c17e58ee806f2bf93b8404c6ef3b24b50c0dabf47748334323f9aa6c48 +size 44539 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62e9ed225131d6593baf3a55ca15045dd55f8d2f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ef4509be66721563817925b63c5370da572766190b41b2c0f8c038ee0a1f3b4 +size 54340 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1c27d9ae63997e1e0e4a7965760277b2fa619c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:981745f8d3019edfd68d2cc5660134b674e982cc013c85e43057195b0e859c92 +size 92760 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f94e295c35b388f46c9bfc5035d70ded4388beba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c3e8f72d262e7e8a2c92308a7d9c44f808f0956d5d2b14fef7701ba1e4d9d09 +size 57186 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d89ba977aa3c61c62328200120b025fea303026d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6b5b7454ecddc944a01fe57527270db26e54f7a50536bdcded9a888c62567b +size 58477 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e126d02fa9e0926eb725cd183fc004154bdf15db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db8842a02d0513a82f9412ab856fdc8a948d9ce7d8157e8a7eac47a7d3d21088 +size 42938 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a8012be92a1420c0b96f878749a0d2823c964a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f9be620584256c4b3d0e40d627ebe95a6c78a5aaa6bcfff930aa7c5262f10ee +size 143761 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..08c59879e437cb9c3f5c1b9dba3f9645ae0e6595 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3656498690ded2f5546484677c3a3d24c331b81dc699b3813c85a3e5c030b2a3 +size 71226 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f1732e4d1eacb4e5b43046e2d0ed63a4d4a3d67 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3d85fb5ef3d9d50b9af3e1490faaa62b558c017f9223bda1ade060b1550485 +size 161997 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4e925b82c8cea8e5e436768001da2e83e277f47 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87248567689b2c4fefb9d126c67b98c2de225da16462bd488837669e21cc18af +size 209503 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98a70044c25f8e065d55d5d89b8928dd86699e5a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e72f594841fd8ad4bde2a1e40cfe0be2be9e0780e98baf71bbf552c63a23afa0 +size 49925 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f32620409375a5fd4ba244d4aea5dc5597fe541 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a860e6b8cde048cbf7bd074b0843a9d27d1cb3803f8ce32ac08f6b24bf9b1a3d +size 31546 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d8508b0b0fa88076ff2e95287788ec0701ecff4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d7a04d52c3c60d0dc55073120e5e1677f5c490d7e5b00e063462a6d6e8019f +size 36080 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5d6b8ea92af0f81b0672a5ddfc560495377785a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a056ab7e48d056ce0018d2e1216961c4f664e3d5e90943e2ef2b50f439338c +size 29480 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1842760310a3f7e51d2cc950d7610ec61afad5d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ceffa27c45133219996d514b0dacbdd8b22dfbcd5b8a6539f365b14760f533 +size 40379 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03aa283b25855c2cd6a571f84dca716741aa72bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf1de117dff2547c9c65e59408b484f2dc451538f4c3672e1a52a4758db3469c +size 27030 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae05408143eebdd96921b5567330e3995d6ce052 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec30ca87b83f080578ccc00b138da47854863c21f23b931521546c570a0ab209 +size 21685 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..023c208edbb895cde2b0353fdb76cf391298e6fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ba261708d19ebf868928c0e68aabb3d69a026f2ef81b2cc7a8856fd5b3702b6 +size 57380 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9bbfca10a453d262eb2365dd8f11e08c890dfc8d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:914841fc20036b73ce41e756636805122edbd2e19cd35f840213798fee6933e4 +size 22285 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1237e9575506b12900c21fb44e8efd55f3d026a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0ebeb48c7b6596a2f1267eb05aa3a9808613dd9e50eeaa020080563980c4eb +size 169111 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e638fdd08ddfc0095c753a5c95927ef5ebee9c5e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e71139bea71eba4b98c4df7221a7ab42244b4684e72719e2e6b2db7aaea1711d +size 90272 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2afaff0124a2461f14b75f11e44ef956364ea184 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dee5fa0f9d2462e55bb5a85ade963d073d363c6c9be568a184d9b1952ff5d75 +size 153927 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f05dacd4a2b822e50056887855058112e8d0c22 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:938087b802baa6029ad8eb61a9cd452659f97240d4cf26de18eb67c057a2e7d1 +size 79415 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38de713c9b099d39c4cd7f72baaee58f17445f1f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e3bf66d0dd2921fe07240f6db66ab08c4c85feed436f6f73ee9476cad62b5b2 +size 74361 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..753e99ed9c16e5c7b3dffd0f56a20e558befb8da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc6754f08712e240ffa6b8bba4204361ffa6340800a0eb955cbf823027e12466 +size 82547 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ca8f0b0c4b1895fa9db862aa3ef323b1e6d4d92 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d992dea811f723290b72d3c5b250c930dae68cd0bdc9579a3e57296c1ee69eb +size 89974 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc314cea46e2146629cfbb2c11e534fca3b23893 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e619fcef2c402f18eef155cf747e7c42b065358881ebe210d34e24dc505a3448 +size 1005541 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e3919de11dcc4cd8f1a52b30c5473bce5407115 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f8d23123804ac57959f1d6e970746da5972f297a273a6eecb0d727d9109ba59 +size 133078 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfc94dfd84e24156ed35396b03ebf6235d7f5b88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8487f9f9c059a6ec18bebcc909f7abeb87455f756c895a4b94cb435b709db0c3 +size 183114 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f649d806556cf656c4f818162302323c1fa7e4ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4b6ebf6c53ea177c33dcdbada2a2db7f1ea2951dbe1a27e03603d6ed7ebe4dd +size 27518 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b47525f37c67e8c78d2f623a1d97e7992a70bfc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d694744d24a9a21de585075ea3fbb0f246a1cfb6bb833fd04fac0ce461e16a +size 120452 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14a77e581b876d602de6ef7009471d0b253fbcb2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c443a0ddd65e8f1818f865b799ac94dd4083521e81a3b5509ca4cdecb3f4fcb5 +size 57966 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3dfdef3f4ade4b619dd9e130957231d3a03306c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abde28940b7d744bc6fd0be2cb168b01d664d0a29b1bc888651a1102603cd1eb +size 25412 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3783c34965f4d3c5f10e9482a97c50979aa9367 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce91e5c1a9dae000e24f38772b8ee87a4731634efe2ec296331dbed610fed488 +size 39416 diff --git a/eval-results/mmlu/0/ckpt_309/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_309/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c265903cc096b38a353f8908a63b9e24d9cc7215 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71b07676e2d5c997b8d6320da003b009b431d9956ba201b6a3f51a56a2c8272 +size 32918 diff --git a/eval-results/mmlu/0/ckpt_309/results.json.tar.gz b/eval-results/mmlu/0/ckpt_309/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99820af18601f3267a4d4e4b6bbc0835bd7aae72 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_309/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab373190e26ca8e47395b2f05cbf9c6057804edf9bdd91bcf9342649dd67c1bf +size 7578 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b8abbed16802cd602cc728ad0a7660022995c6c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93fcdca16f6f7e93823952ab8b49761048d7b72b5815ebef783cd96322ee4e20 +size 17071 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..152844e70d7b1dcac050b56e6d289f63e3977093 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cff816af5f6022a3003e09f1e8d65e02527e988fb484ab54256515c79fab44b +size 29802 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12bbae955f20a594570a626f9984e47bf8a15aa2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d442a89aeb5e094f0184f445753be9977abbacf4fa04a0539b0c4bee78434f60 +size 39848 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3d08398f1ac6e4db8905546dce3e71c72d12068 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfecf0e19f7feb47d4a2db6c0b21d5ff44d107b1ec8f0a598b65ad57bb6bc3c3 +size 26759 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b87a5fd1918eaf57f97fbd61af7ea5de2d9247b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d556f21860bd66f4774c28ae59cb426924309e187a01c65b14fa58c519ff14fb +size 61218 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..464d7c20a129dea1bd09d1fb6fd2d5921b0d3bb3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4164eb0c78684a3c1ba9ddd87ba1f4cf2e13db3248fd3fa111f3a84ba6acf1b8 +size 40403 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e77833d13a31b122fcbae6afa06d92c22cf226a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6836ba9ec096d0ec45ee96fa63eb966092e43bfc89edad7b04d37fa5618f09 +size 23816 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..228d80abe30c67bc14ac59601dd4790dd4895861 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff9d8f6e4246f70350bba6abf9570e2edaac46f710d3fcae25a298bb509fb72 +size 31129 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c87c157b7a4ef6d6387f2963830b3807d3e5c6f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c78a561894aeefaf824677f1aa8355aca77a812d2df7e9665e11a0865e4d111 +size 22921 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..754bcf667be8cb207dc637e6d0e3d5b17ce35473 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4006a176c478673f83eb24b6f03a977bca031cbae57b04e620820456bd7c6e50 +size 60891 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..120b2ee589a935312c3a8c35c2f42b1e341f827d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edab0977ac832c160ed119713db4c46863b3db094f6ca6a7b1c6c8518dd0947b +size 25744 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..585621dc354f4c30a8823519225342a8c76037a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf6b38436a1091de4e2783cbeac4d3cae11d2552527c4db0da3078e84a114d0f +size 25768 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2745fc404884a581499543d70e686d17a676dce2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a2f056d9ef5a3b068676bfa9e8fb738d0273311a3dfa9940321354163d7e1e +size 46549 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65055e56c18697368db364d493b36e69e9a439c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4108a472d2b6cd1211ce9f44d1a2fce3e19aeea20bfdae5aeb3d7bd452c1efa5 +size 31489 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..976e37e99a33ad3d2b7c532b242091b61ad09235 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:325e8a1c137f119fef9f071fa38065d9e59ac722db36d2536924ced4f5556e27 +size 28797 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b7d434639c8f8a253b65bab059497141bb4d16d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb229197e2b79b748798be2b30a424234d2e628cd387de88882e7b312a3b8903 +size 74673 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3b1842f33caec77e03e5a87ec8fb8a8d1cd5a90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4798cec9ac1028343871f42d66f64f37aa77ccbe0f40c9a9385dd9b6ec38b49 +size 30147 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1a3f6b52f164757ee293b6208b17a137b9eb063 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c3a5a3265250481c2b9c31697e2fbe67a0706fc18c3ff6682365a07d43dfdd +size 19124 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc310dac52be5906ceb3a16cd2e380c7c270d23a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff80555da9367abbd774e7e2965d97d8b3d4bad398556608de1d9704d9b95d8 +size 87985 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd6e22de4e2d04ce4b6a1416fc83fce4c01956c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f0f5318f39ac83ad14c3966cac742eaad195fcbcac5d9051f797ec5eb870ac8 +size 50021 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e10da0a862e0fabfaf21a5409572e5da754a5efc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2e4667f9be27dc73149a36940b34b3022717a4f58ee58a6af7e848b490dbaf +size 31608 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53a77724a0c94f0c51a6a78cd6fe70a343e50b2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d64fd8e9b9117472a624b6af06ed595cbec58656b12cb5e2e2e92051f4a73b4 +size 145475 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8f6c66e98dec22bcf0017293db43fc265d064f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8e6b30703d408b6219745297d37b99861232491e106ed63fb354f670837d47 +size 44550 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b48ae646497ff5843ca0ce26d641e9674eb2081c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44501ca5026c4b0d210067923cdf758e5fb71c8b0d8480091f4a672ac12254cd +size 54367 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e21074b242c35c266929a727f176e198f37cb43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fc2e98b54474a250ba02dc6e7e25c4a686b188149a18a3fe994041147009432 +size 92762 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72aafdedcaf04ee92b8c68e3d3e47f0bf21bae18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f8360f55d96a2c2ec7554e04bcd873ee04b086049da32e8e5a8833b56a208c +size 57144 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05fa7cfe70d761d6b9aa4ad4233bee17067e5d42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c72dad708d05cf4974624a5a80c1c7e82c6610b6f988514f1e6c34c4380729b +size 58445 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b3e5099f854a5205efd7f17bee2cbc4a342a018 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c440c9c473061cc72eab0964f2e461ab598fb0b524767678ee2650933298d56 +size 42875 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a1037a7a61ce558aaba441e07773386b26034ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61886695cb90e8c6b178b8078b005c3f3f13a079afeea7d06ee5550fe7e15df2 +size 143777 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62b66a37df1756d3bb9e71a3d06b7c6e4b187e06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ccc542a6e3ebb92484bf68b5fb360542532d30b6a13f70b062d3bf749adacc +size 71223 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55418e4439fa9cc480a0f512b97e5ffe4d0b2abf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80356a71a794399197413c03360c03a09ac70b453b37885d5a5951a0a0ffe5fc +size 161911 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..499bdf05dbb627285e23be93e270b7720034028b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad1b15620fc2207bed079cd3ca1a74eb05fd6751adb992a09a860ce251994995 +size 209472 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1948810a444e8f61dc50a427fdd2cfc08f39c739 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46194a50fbb172339c69da58422f8dd5d1f2734df7b2688436f470ad5d7fae91 +size 49898 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d75c893c1f131cf18202c241f748a03d39843a74 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f933ef91e820b9b89a59eb233d7754aece92f8a71c8c6829c158bd7d8e249885 +size 31549 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3f23e2aab70a30c28474399a7474b4fb68b5511 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:342340c991cd577ab1c62792e1b730c0ed994df7e3fd5d1699c57184fb6b2755 +size 36072 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61d46f65f5133f5ff874e7a53f3533faaa2da201 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aec315248eadd1a2d6fe70d05e89617dc16d21cd03163dd041a9730b6c0489a +size 29482 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3dafd2760f49dc4c7c5d13437fbe0084359601b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbf4c43b9da0d97798bec54f676eb4f8dae22c078d58b9cc1cffefacc6e5cf44 +size 40327 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da582fd4e38682139bfa48f53a4130e6ccba7b73 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c6dd72840a71a22fb5af351e5bedba64f628935e63f36d44a99e194e4bec71 +size 27055 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea4146e9d86551e20efd2451189b58589e1cdd6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:867ec0a51c88aa14739cc89efe2066464bd65c5eb1610728d81abeaaa14ca7ee +size 21698 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b63ebf0ecfce42628929a5b5380d2adfce28fd22 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e22c7f345ac1bdf8e114d3c865978b88aced28237e310c66613787c284acf013 +size 57340 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16e7e4c9f2cec6a09c193c57d3b4cb4a37237c35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a71e8682f9f501f88dc5958a8c416a6e7d7669156239d94dd2cf13f9b8cba42a +size 22280 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d59860b75b6d57eb3ea30725eeadc89201f0ad07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34d72de119d570a8f177029c3ec3ceaadcf3febbacf6f4f0094d18c1295d829e +size 169059 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..946304a8e72957c77039d894c8f6b8286b274a07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aaa07113f51543c6c7d4cd203c40c0be35bbc62c72479546a5accbdda14278a +size 90232 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33d9ecbf9dfa1eeed6ca3a486fff7ec14697ca06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df8a7894e17612a10070d222cc0ba7e2f4db672684d7e58b8f1082b92af5a6ae +size 154119 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9bb8575b01501b14d407ed127d2b60660f9acb3c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5baf4b4ff51c3d87a0bfbab451be4c01afb2b1c71525b08e1223766be5f4bbdb +size 79409 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..921ed8c6b9c63fbe4e16b355880100f73d2766f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba58b3c8eb5b46ba1fcc44bbe60ff38dc80ccf49f68f1c273a2b4905637cea0 +size 74267 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f87a876352b46199e21b39baa1d2819d096bea6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf3eba8a75cf57a1f2ace4acc555b4e364536025a23fde2f04287b7027e9b18 +size 82529 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..756c22c1f533861d8048f21de309b53e79ee1a66 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f852a4d60cae7bc1ff0790b64618ef6b7ccae519997f108df92584e4cb6e9b87 +size 89971 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..687c8cdd3dd0c14ca1bd21ee158470b3d25112e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f59ad3ee17c25f6582ed88460621eb2dd484bddb0eda41c55df5dbae21da01f +size 1005129 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19f38745759d77a7cfffc1c0b3449dd623891484 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5a1857d58447a274a59865548c1c96bdbab58a912d4bc582865a50c782569c1 +size 133135 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf0a48018695be2fb662938c5d0fdb53b0747d45 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92ffce24a866b867551b8c69589bd0531891385c989e1801ae7ca2efdd45d394 +size 182890 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38d9a5967111fca9f6eaa0b9df3052966d5a849e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04d632746747035d70be53205167dc0b79ac9127da6e71b6523812bbb5abfba9 +size 27525 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e0cf4f09279756ec463ccc6b56a2b1c148f4200 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aa7c644772c1fcfcb4390b788f5572d9a25926a3b1842c0d15a4b8ac1b25c36 +size 120455 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e9592c9db5cb10067534a2054ba975695f30f5e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29f477a59a76955ac5785b41240afb971f231e51b90880dbbe104bf8712d9e5e +size 57984 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..807a4046a00f5f67736f1de41e24c8d3eb23bcd7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bacdbbf8214a0aa3c3816d850bbba33a1b3cbaa9bd4b8d1e8b3d0edfba7f695 +size 25389 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..749da53c52b252fa176b21474c108064d68a9a67 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b7ece47a8a7bbae9ec60b4cfcba8b801d701cb9849fd971c9595deba7f453df +size 39468 diff --git a/eval-results/mmlu/0/ckpt_312/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_312/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31c3417637f5ca46a5585f11f9cc4165085748db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e9e13a379ce0e92dcca3b39faf17f3d61269a4ba7fb6c7154e6eec84779ee8 +size 32944 diff --git a/eval-results/mmlu/0/ckpt_312/results.json.tar.gz b/eval-results/mmlu/0/ckpt_312/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b5c499cd97d61afd67cf979e780893180f782ed --- /dev/null +++ b/eval-results/mmlu/0/ckpt_312/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f599d6b3e692bd2ae0be58880bd25f292133918992867cd33f8f67c1f096393 +size 7607 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47c398ad551515ba445ccd2fa852438ad1350e41 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7bfe427e638a633766449b368935bb841a9b6561e10bbd0f369fc95c47dc443 +size 17076 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4bee5b4064d651f47886f3c6d869673bf9f1f862 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fff99114b760e7071421744d5476719af15220882ad58841ed1b9702e7baeca +size 29840 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8acc143aff8b85d6563171f93afed7437461b677 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24c5c8c41ebb6dc2b1e956cd19dd8e9d00d95a52a60c95cf7c67eefa9034cb78 +size 39869 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e84367abf3bc609fcf105c3fe8a2f6ff312d83a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c755709b31412f3d5a4910ebb9d33feb46305efc1650ab1be67b5b5556b65b9 +size 26777 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0a9e734ddff6240bad256289e09a6be1aa045005 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aab58269ea6535897543b38cf9b17d3c6ab6098f7f5f57b300b4fdbe1fb568fa +size 61232 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f6080649ad5287e8bc5d9fb12f6a69eba0a6f9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcf239992e65e4e40270fbb235aa46ac68ac2435686ccafb112e19e96eb15ab0 +size 40392 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..282c9f0b6e9770b47e56dbb7ed1a5ef8343aedd7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9679348492dda31320f11547ba5f1d9d481dc983833fc1a9fc3ff80af5298b5 +size 23805 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2e9a3ac893de63b0af73eb1d0d41ddb3eeccbfb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:868d113728f80cb20fbb01ebf5cdfd8e16e94574cacd87c865e0e7ca1a94f9fe +size 31143 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30a832092b0cd0507728c4e8cf95e270c8c9b170 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1460fbddad971921f022c23864c1e32e85d507a08508df5a02ce2cdd6884b73a +size 22974 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de80de95f3a0348aaa658a8a4a7a4bd68009e8ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:974283a21f7ccdd38b4fb48d1662553bdf0d3a9a44c81865c40d498ab347f9fe +size 60953 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2bdf29e612549974ff67f18eee7306114ff59096 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11d5977707436b1e4ad0a59c25e66a1a95371ad0d17eb303f2237361230dff6 +size 25760 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1af13283d01181b4dc735cfb6c95f3d2271f2fde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73bbecc768aeafc12adb81a222560be759bdc4a11608be646392d6e4df794bba +size 25773 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38e164dff133b430f9d4ce4f5d1c14e2879178f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c162278d0b739611070ce2b07b6702a50e9d2b84a7dc0d32010bf9b21d8eb696 +size 46502 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66d5fe8ca9bb8abde7e246a2df823b3aed0ce67e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46ca92706891f3f30274ea4934d9fc913f15486af4df2e4be7527b97390ba2b9 +size 31508 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..535333175a85988352fe856dcea4484e3ad19a49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70e90fc2088d8bdcd8bc6157ddf446ed394d2b128e2804e18e794659f0c5ed75 +size 28858 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a94be8209177c1ce17678acfd6881e48bf0cc28 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db28f07fdc855e58751a9b3f68e2a496cde5e963638fa9e08560c320efe2b53c +size 74797 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..215418bee5c59d7fe894b0e5533f639c4eecfbb3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb1ab906427718caa7cfc29e873da48f4f630aa8aa12b4481c5e342dc8ac8e06 +size 30160 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90ae8c3427c7daf3ab218a645f96be54e865601d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c75eee259bdadc00e23567a10a89343d68e8ea78e70bbea659975ba587ff5d75 +size 19132 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0217da6f09053bd4cfca8dbddb7df653e11a960b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a98f8d8284a7a04c7f75463b74b420672671e227151f4cae8f33878ae0a1cc +size 87964 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db4246224df69303276c4eb66a838e96bb92f0e2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b784f6c89092db859eb622e4164e48df9645ecc189d8697a4f83ad21900162d7 +size 50035 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7af63ece35f805325bdd2bc14bbe2043cd436bf7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42b13b18aed76cb68e4502929cc7c9db137bbe52d02f1afb83a0f80d7313655b +size 31629 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb4ba67e5a43c23d9bfece6f6c513315ff619ad8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3817c3ce861fb85a531ff20341b0726a69de9f101bff7a986c79659f0a20d4b8 +size 145399 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83afe2b8d5d90a6eb90bf2e804724638d0c9b402 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0ca366f0b988cbc820b215aba8cc78057ee524a3183127414c402258deca103 +size 44561 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ea95c5ea0b922c6fa2c06455f8780f131e30883 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd2ba91ffec4242b420fb633de21dafcfd643e31ad71eebe173e0d5922d2782 +size 54335 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5199e2194866ac366645fbcd49a3053ebc30415f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85fb16b57107192ba458b0bb8c73950c39c1524dca9e27f72250302c2f6fcd19 +size 92813 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f734b60496f79cd15e4d7c257bb3d65f7377cf35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d556f95102f7d3d4813c2b7fd8b99ea50a461ef08798f68404f99e92293e3d6 +size 57162 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c38d5a4a12df9b235c1af1d7af2b8cdff51f2e7e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b5dcd5e581b0484105e0768326cc74a58958bb21f541a1496ab664e2b15aa5 +size 58477 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ecfbee488f97ba5ac7e39306933e0594aac6e1e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bf9d8fa69c7122b9fe012d6cf39c2742b10b4f17c60c34015ee82db6240768c +size 42937 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b570e1bcabaab22d2547c6a0a8f83db20577ae1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2477bc2f05cf9a562cee5aec215ba48c2dcfddc1503a634f1d90571aa2e37a3 +size 143755 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9850bbfe7a151bd1c217da03af926b2794d37c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2fa53b377575096eb419e5e8cc34b7b0da43c8c77c7d58d6867bab87bf27575 +size 71208 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1700640a3045b7997054c504dbe070408890b52a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4e4dfc31056b17cf4552af1de1c785e918164a7a5b1fd75f6af1da3ef4a715 +size 161925 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be6c9c0b1b5677c9af14b87d6ffda88c03349896 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a5ebf9f411923434892108753e025a968dbf1e0cedcdbfaf62f97f1d3b8851 +size 209538 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9da360d0db3941532bf844981811672fe3813348 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eda45af23cbb6fc75bc8679b6e68cc29a809b19d7f417970f747a3ea4f35f2d +size 49929 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4b8941fef349c265f83d39ebb3e770dbb70fc77 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccbe4c6adc65555717396c2679ea206a6b3608aea0564ed2a1af060be90b58c0 +size 31530 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d2f0326c454079251d1922a467340362b31db225 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2535cb0af7c1e752cf0562c24c7218fa13f4230173e9ef5b63bc976f292d2a5a +size 36084 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4883986a3eef200f4f233596ede85c3c5d16ec91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9467efb2967c934ea9edc5e0f4af8e8b6a9b3a7f1696a4bde017c34d6661ad7 +size 29457 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b19016bfd2936abfe3d25de077913d58a7468bfa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82f309fc2e8a353d2632a708ccbd812dce93989ccb0dae6ba3e6a9387b9e7b85 +size 40322 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54b357bac2fb177e2f8a88e6a49eef568a13e475 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd14ed922828859648755347664f3a3cd4811b7cec7de5d00c0671417020ed94 +size 27045 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a97855b286dd423542147474d12b654b4e759d42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaefd91b601b1ca76a6c88e09509b26a2fbe0a6f1d59f1f752735be33af9932f +size 21707 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8bf5d54c9572aaab056a094ee7bd0accf18a7ec8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4319b39f767e9c9ac5072c2f65401736698ad0036fab555211e7caed905255aa +size 57394 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a98d735672886cf3f0a458fde0e03cf29aaaacd0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7baa9702860d88c80a94290b8acaea5c017234a40d8e1e46765cbbf451ff43d8 +size 22240 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea8a401bebc07fefbd91ecce4a29ff2ed1467fb7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06f205ae2e4d665e4a79cb94be9f2d7a121bacbb7991d465db98fa706e073751 +size 169062 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca96df7f43037c3ca28335017cc0abd72fba5fb5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c063b968e54ff6c20ff228d610f113b87bdf5105b26e84a65452b44da5d4bd3a +size 90306 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37ee52c2c3c86a338f2f5c8f0f63096304f4f12f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63982c80ab7cba52f9149b038c29e8d4ad235f4eeb829453bd6ab2240d17d47b +size 154379 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4f7f9f886c0c6a04f0542935fb246aa92dbef08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e52477303e84963dcfb683f5393d1da35507a00133c8883e5f996b4b008652ca +size 79459 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd9a06173ff1a78653bfe6493632fdf6ac38ffd9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bbf7131944547ff8fa227c68da53b94630375b79e8c04039336aa07f387e9b8 +size 74415 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8739d6b8b5c747b56be31638932789db65eff5c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba2d44a4223827492ede98e0a08987d34c2a6df2baf8027d10d758b9ae32e82c +size 82505 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd0dc2b2126ca3d5b58e6df65721d896c7ee03c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9aae1640bc8d7f9238a65c422f1bc64bf58cff3d4ab128c2093723b802bd8b8d +size 89984 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a7f594d5b18eb6b981eee42000ecaf25f510f1c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31a00c2be1626ed259cd1607d0bf71d1861b382c7fb36cbabc0c503bcde0b13 +size 1005547 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25d016fd99520e6dab1db7c814e0fabb444d3a43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ac66623557861136555cbe5bf381d692e51d9045a3e71cca69af89b565951e +size 133120 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc05db4d7784d6b1da30fe5d9ed11b3f25d99704 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acb63d07f717a0f78689a1b19781167d2bbb483b81c71f2bed9601a062849059 +size 183110 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cc2160305d6f8e36ed1fb3a1339793739202a51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40c7ae2f7e5dc0836115a8e0dc605610d46902d7ea6b77338e06255584ac143a +size 27517 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a95b0e62a1f67f4570af5b9de398abafe29b472 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e2a2916fc5a265917d6371651fba5997c47056bb51cf5a721561b97e49fa80a +size 120490 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8265453ea14ab85e405f2648f64eabf70b6dea9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:862dba50e4611b7c8cadac59d6718065322bad8a8d5e7f3c99e16d36d2fc2442 +size 57987 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5d085936d75efce4cad5aa6b32bc73379b41efb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c885a26d4552bb5e4022e231491911796fdfc6ba80ecb4c4b4eca132a6727b3b +size 25415 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa60929c611b7d66a2cbf6e4eb62365e8fd05339 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c28e6bf17b083d013b920cff42d6f4f75750a1f90e41f7e09749329b29511cfa +size 39462 diff --git a/eval-results/mmlu/0/ckpt_315/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_315/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7179b1092e69acd8848e0f27d50aad3dbd17f10 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fad09b2ea0bc33adca00c5413add9442e1729bdc7b8b9450bc15f80d6b25f3f +size 32960 diff --git a/eval-results/mmlu/0/ckpt_315/results.json.tar.gz b/eval-results/mmlu/0/ckpt_315/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c871bc7a71c391189967756b20b92343954b972 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_315/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:483df8ea0d5ade111d8465f16ddb15513c0a8d6a69badb8636be92e0f1fe0b60 +size 7579 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c476c018208b676ce0dcefc2894221c5513038a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebcedfc42fbb2fa1ed2ce14f8dcf5fe60431ce6689cde196657a9fe394a4f322 +size 17063 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ff00243586720c7954f8ae6c4f40f1b8b24609d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aebd2877b2bc1c83fb5effc6745f97507135aeadae21d41fe03c18bf33d158dc +size 29830 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d389c042fd537c21bb7081364a838a3c536f68c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25f57fbc7257745d966da4c6a580de3a0dc322ea56e8a31b0c6250893204fcdd +size 39846 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cf439bc42b15ce04d65ac0d2d0296573f868464 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3740f0465ff2050a0aa111a4e611e627e13dfda4cb1607a20c4a538672acc94 +size 26746 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2918f17828cab1b119669eb840d83085d6a1793 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c849a0c2d80282f233e54187aef7fb9b092e580ca8fcf3954da2f6fa3ed2ef2 +size 61220 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29eadd42c699d5c2f108d3c8c44adb16a3386629 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9728089b2729cc02af6a430951770cc04dec38111388ee9a9a427047fc35ebe +size 40407 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..42a10ad56e6939a9b9fae74bc6bca4dd5550d110 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11fdcc236785f92b0c467e28a0ac1b8ed6dbf018f0c22275ac7f4cb72dbf747f +size 23808 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea719dde1c1fa0d3a7889bf9795e06cfde00ce51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd77ce8be8fe81147c62487e84fec94ab3fd3582b085d12dccb05eaa6487ffc +size 31097 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b226038b37c09f440664b4c70d421ac2dbeef5c3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba16facfcbd698644b2f79bf3ed0772dcd82a1df7f69fe17e26080f2e6921df3 +size 22943 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c125f26a8b1423ef30acce55c47bddff94750ca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f905184d1c33e98891be8d2d3ad9498adbccef2ae0128f29d2d6adef92701f1 +size 60942 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de42e280bb4d483ad4b6b4c7a3abba35af845afa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02a0d6f799a66eddce18e93f84c82df4469d0059514ca41d8d1d81b0fdb86806 +size 25750 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ae6729bc4a7a70cace15b67b22905bd09d9efd7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a024327a2b32e080e65d3856e9db516cf708d4b676ae15333cc9ae4062c7afc7 +size 25791 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24010e50cc5a60fc305f2a33ae50e021a98184d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:becd6a6e6813329076b5ae307274548643b381d9c6f01b6e15ff60d6193177c8 +size 46488 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7bdf9bae8e4df5d83715863c7c3efaa5efba11f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f0993a6c13e5cb79df3693df763505b62018cbff01d7dadc06f5ae270dfb269 +size 31515 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3eaff81b51fdfd29e0da724e30a647978a77ce8e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a00756703d3c75a569d2712951502fc424ac6b398c2245f6dcdd9a241980ab2 +size 28837 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6127c426bb1bda33ed39103fb5bb0621301d57e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f5429e627877f9f6522ad9716b81daa4ca36987775b69f19899609623ce981f +size 74753 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a018d9da669240f436b7236c5c568866e72f7ed7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70fc1c2ea18cf412d5fdb8539bea30460bc14f632e68947b3a4750de6b97e92a +size 30124 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a6b3f11348409a176d60e5d80d5e2cf005d7eb6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ffc19249cedc7ac27aa80c84821d6293b31b1adc57f1f7dca74fc6a2fc59127 +size 19118 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d52f2cccdf7f8e0c62e59682e733049d6423d8ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b0911908c8073abc5f89c375fd62fe1d17e1a8260417edde30a163b0f96baa +size 87904 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99d3aefd392ba93bb097fe36c03d02e5a757f6fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78dbeb154d479e8c76c32e3916a7a554d0cc3c7562bef0437c3817250a5bd63b +size 49988 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afb075cb58d4785280601185ab450a6cf2482074 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74aab14d2ecfe103d1a8170f15a2f80a2b6fbc6b7da197aab49922ee9481e0b +size 31602 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c96485cc32f9cfd4fcf672df44c6dd750de9dd21 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c4c3c134ffe594e1576a0324808a5243012877dd385a235c169e43e394fcf0 +size 145438 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb734223c7752dd3c09a3aee88b5f3478a0a6300 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3844a122d0a8fdc8abe9eeb0beb797aec598eb2eca839c8a678c731ecf5a561 +size 44562 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3c6108f8c9e5aa0bd577b22c424e43d9ca14fab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8292106262b6096458683dc37d6a59357181e82863fab76643d654a782be1801 +size 54384 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c921c1c757bee053df54d1d7f711999e6b9de17 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c70c191315ea602cdf749c801419fdeaf32d21321f8abe7432a8d7f5dc793d +size 92828 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebe94486f6fbc8ed5e136cef8ceb5b45132fa0ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1950293f0e5b1a8599536ecffc0e79c68c717f546b9dfeb92a47819f311e410d +size 57136 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f4bb22e1d14f82025cc059cc8214f32def7fff1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f832d619883d11281cf7d250cd48570e1362f0509e768d5645660416e58691 +size 58460 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77776d922c26e87257e931a81cbc62f4d3857006 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a12cdd3cc46e72df384a9b362af61eb80c96e660d5f97384c9cd4e4ebf8c08 +size 42925 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a18b2357d8184c8db80a57f74bb5212931a3eab1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6364a982883889f6b37efe057f61d879717b0aa3af77f8f2e5d87642f61ce28c +size 143838 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4df0ec6aa8dd12bc0115f06e558f15a270b95135 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a45036862103c728dd81eba32a487deb0582ac8fe74927312b33835a641fe221 +size 71214 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65aef7a58f64ba801a177315673d9cea51f1fab3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e64c4c90e71e5d4cdb8c260750db00990c62d20d36b1e0e2226790cd8287183 +size 161928 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f9cf617eb90ff9ba217a2442d37cf862eb1aaf8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75c38ce42601d82b1ffc4baec9d57036db41e91c0e108c01ab92e8bc37b51d42 +size 209520 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1174d0341d99f5da784dcc9bfd67035e171c7209 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315dd924bad75aead765e97962481a971919c4990176ace942e581d213c53044 +size 49900 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35aa488b4249ad91edf11edf0f0039db293957f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c6cbcbd026c6a1e3722a44d76bade70cf350ae96201572949f58aae97048301 +size 31530 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2efe135005c7660a5e9a47d9a7769d599b00645e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e6c8e5788fc50528720756cb8be591df0c69b705aff68b6dcef6f4658ac977 +size 36086 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..571f67b94b427b06d50752b7202d46376ef4e029 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f5ed4e7baf44e46295b8fb062299ee7dd78f08506b0dfc8da3d1c6f5f28f787 +size 29468 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcd7899bf13f9063a5de0db4781ff80eb6f3f2c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5daaf8b0cf4650a7e117360b6713506a07e40c0f568932b55c3b2dc783575c3 +size 40338 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2699ef6b46cd76707dc65e686aba01d61306431 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:497f65c302bc9961ab54776185dc0f991d8629a30ee3b88c688839cf2a2079dc +size 27052 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..267c8daa45ba3a4beb3751e45c62cff2e4769747 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7cf07115ae4e5501d50930a60fe437d6dcaec850dee0df18766194e9274827d +size 21704 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6fe20c476e2a7815d65fb5289a73667558d34bc0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c5d82c71253fa0684e849b22c5ca36d9c24667a26cc8d7541ed6ae1d62f5a71 +size 57397 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e3593a4408723cb2cb51232b89c7fd90c2ba909 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:991bdb1bf7e584597e3de2c417e974416368eee46969813bb58e0d4bc076384e +size 22253 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da929b52ae43740ca62f87a405942b0582e05ae8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f33e6d65299344a9e513680c6451a058b3fe285f191e3addcbd4338b7d395e5 +size 168964 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80a96326fbccc93e1ecd9b70e8b6b9ec70f3f7f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6398c00b1923824a5b09ec1b1348f395a9076979433aa8d95795dd9b179a5de7 +size 90304 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3962ffbecc06c4ea9faf8e43de9c9d9b346fa16 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b300d56f5173499e2560764a4730b95b12065d01b2052787e0ef2f8682799d5 +size 154211 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c84c69c43189d7911ef89baddda7c993895069e5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46dd5c4f03f23c1ac765f8379483661329fd72c81dac7373dc9a0055a0fc9351 +size 79433 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..684258cf02f5bbb34598a517004ff6a80ee961a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6634786221c86f6c615ea100aa2683b4026b1bddfb9014805ca1dffedc5a8391 +size 74306 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..540a4e6f5212e2ac7ca38c21d54185367c4b1abd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f67f36f88ff272474b7447f0d8dd6b90cae194625a3475e1250880f71e9b4a1 +size 82536 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..988e0f787202809581c01069cec47ee3777b246b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe024d0a3cd0e6c51695d852e33538472bbb94d26efce9b429b74ecf02c91bb2 +size 89941 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fe7b8d7f0da24b2f7d4c044bc991c13bf2401e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecbb36611cf7798ad3b64ecf0a793b8f54605807fe78e92504dee7d5bca134ca +size 1005381 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b2dd6da042d003d58ff9a33e3cccc0e857f001b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8daaee65132e2247d857f7658a8c5497591e852bd88745bce86fbb4d4b7c1eb +size 133137 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0885c6e38a6a47fe3de53e744f18e0547369a8a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e594e1e9d510ab2c76d0b980e9616ded6b1b1487697a78157574b00ca14f9738 +size 183018 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7f7bacc16530665f753949ec06f7600aea04c7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a883beba23a4b3bc43d226999e2c99c11fbf9ef002828abdc40cc88e9d0967 +size 27534 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93038603a34cac7a7eac5e930d932744d334ca0e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12dd9d77cd35d841b2782a4b3ae7ba437b2dc4089629914e8ec3f5c6ed1b8797 +size 120447 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac255a1aa1242cd9b59cdd55be68aba723295b44 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c13a84faf2d230cc6b4cac742fab9440e0fcc5548c52c8b72f152f660272780 +size 58029 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1daa591a10d6a49ea1a20b8033ef732a09e82b5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08bd8c0d03ba51057414629b2d36a2e93b79b58111d4e72efdb7903df1a5184f +size 25387 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..697075a9343e31c7d506044fd0cd246cbfd227a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64e8bda037aa9b70b463fac4748d23a64f6df3ab7ce9b2c65fc5ed89c65ac864 +size 39439 diff --git a/eval-results/mmlu/0/ckpt_318/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_318/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d3550420a2bbb9b5dbb3956dd2b87b598685d1f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbded14c0496abd60edb75b0d5a11c26318b7f846dc81aa1fd535c61516b9211 +size 32908 diff --git a/eval-results/mmlu/0/ckpt_318/results.json.tar.gz b/eval-results/mmlu/0/ckpt_318/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f92b2acb99c08e38bae26acffa70d84518c3c5aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_318/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b438e63846bbdd4b17340eca0a9539dac74445932ead5101bdd1b77d776a771a +size 7598 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd473713c7174a0470ada264faf8a2c272231309 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd7122acca46630ae29b203a31f15967b2f92f5df1b2457568614fdd2388ec60 +size 17079 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..443ec9e871f904ad1613641242a90266d7b445ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00727d205d444a449de99beaa546ff82fb9b289eed67e655b663016de4d2339c +size 29864 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a76d81bee939c9d505d76130396360cc17be3247 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef83084d2b1bcc3bc1afb387e3502107a449fe0d96ccb5aba099f66f88cfdb0b +size 39848 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c090e58b5440641ad17ef313fc2be9bb39ea4288 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc35a29004811ac2120204d34f080e3d694eb9cdad49ec258279a7f02ee639d9 +size 26770 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55af3ee2d779a52cb2c0d44d9b7da25913d7b8c4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceea429d1ad0ee84e003eb19fc6994a0ac36eec4dac70395194bd3f65d8367df +size 61245 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36f2bcbc968b8bbed5ecaa9340eaea398244d00d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b9cf64145d4bb50d6aa2bde1705e8424fb7ccc09b3c1badd41553247ceb8f3e +size 40421 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abd329701ab8802aa976ee9586f3e2c5dcd95cd9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:543701832d60729643615d187bb08b6b8834b5b86809c89400d51c4f8bd3e2b1 +size 23820 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7be853c0a431ea3dc2bb9b481410c319cb50be4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b5b38536d61d6b30fc3f30a7aee34582a3fe170b7f98eed73ea5ce291f4b23 +size 31130 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10249482b96e42fb619765868d0ac854b5069e2b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b03947f1e9939e18171671f465e98b418f367882181370d50825e303982d40c +size 22903 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12a98f4cd87567d91690be121edb2283429b1df7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca909d70e2fac4e36d3e8db0edbb29133b344c7de28c07988a8389ec2998061 +size 60968 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..767a736bddf817b37fe53062eddad501cc1983b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b5be6a57f4ca985583247fb45509bcf800a8a8d8b4bcf0d28670adf7ebac529 +size 25754 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80195ea44d1df7ff0e9bef5b5413bc03e70deb70 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee8954c5a5295971cfc9370842917ae83a1c4976768661200d8b7e948b2830c8 +size 25776 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e390ec92e72fb79a6b4f8ec2eeb37e6b6552e63 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52acff18856bccb7cbb41e238696a743e5f9c10a79479d1878efb4d8b348c2f2 +size 46516 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4422ebb464f07522f23f76eabf526ad8bcd76f7c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d8b7489ec7d4cab3ec0c9504613e446bacf6da3c056fe522e834b94557e417 +size 31492 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53f815bc9e3f1b3852c38ed5f974404a77cf9ce9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44000d85cc4ae1a0c248aae318c2878ef208e51a47996922017e422b962ef509 +size 28800 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..028245e7a402c0b1f80a7c77d017c4c5114ac4a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13c81a224211c3aa6b1f60f31f924f18de352deb31dc5402150f9c5d35cf4df3 +size 74787 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cad2f1ed52198b472f03b63fd18ec72005d68dd5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b868ce0573bfb1e80c34b68727aacc7caa7e34e1a4757c8ce438736366a1de0c +size 30140 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00389a911a035fe32037b46210a143b5649c2734 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e51d8b49a2b6fde8d00002b9d12a6ed56515d3d4531dc7f754ea2e253c68465b +size 19113 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ba53ff93e3dd27fb2e09f7bb5b65e82627945fd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:557089ce448a767dac66f9b4a60365fbc8db0cf7550ede923b63d58756a5e431 +size 87999 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b4d3e40c7ac9df81dff45c6b8779a21c1f9659a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d251939fdb010ad4ad098f610642521d7d519019a18d7a6dec9d6a860cca7cb6 +size 50050 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..325020eb4ad19a52f174ee36ea507165f5899d13 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2fb49f26caf1693c3636fd077b18748a9f1cdf426f1c57889dc98fab859be87 +size 31615 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07ad4cb0eae0f81815eebe51468874d3fe6da164 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f06f255a345275b53178be9eeffbc3d6be7069acf6fccf9deebee28189e065d +size 145447 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39e88820e6bbafbeb40d9e833ec6125adc140ca7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545039503f3450b9d715b811a0eb24369f23560e43c211becf63a25327f8b7ff +size 44527 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6253ca1f24fbec71d5b7ce92c68c7a8caf2f49bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3539725e2d25a2d7949b15f5ccdd6a543d0485e3b8f9859d09f38043666679e +size 54361 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a0365a3c6be30e4019954ed0030b3d7265a5dcb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bc9c1ec6cc8297e8d54c5e809a77fe0c4f9cc78b73dae202b27443e5d4df3ae +size 92870 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c3bb338d781ba61436e107b3f805785cd3a9dba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e80874886fc8651be90b484d23a367b126d536910b23df8ed90b0c42a6f7fe5 +size 57152 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..427e86a1be2124dcfdca3a151f12c14f923f0255 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e29415cf932f665d31a7e9e0cd9203b2c904678afdbf365ea9f46dc1f55cedc +size 58507 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cdc5e939b5d9423ee36dac4f9936bc44d018dc2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:931878272f1bb2a2c5deed34f8e4d7e15aa60ae7fd7e7d4927db641f3b79ba2f +size 42892 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de45262af78a0bafb1c95182e18693e160104b69 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe82d2b5ac73819d2356054bd6c43155ecfcd5628903fda8b8aed0aacbe3321 +size 143839 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ee72b236868ed634f6abb6b5e6f3655c9951c6a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f587a9350bb4adb95fb229149deab5d8338b6160b1f0823d0b8cfbc8d2d4c2b2 +size 71226 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1604e2b7834e24e34502a32529bc7acbfa3d0853 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7160482511fe1c3a322c70097c12703af1670e87fa9186cd634640c34ff7bc91 +size 161965 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a16cdaa9952b6bad92aa202592c3af797f8f1f59 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb1046388140c0ee676badc1c7b33a26210df18f6c3f88f83e4dfc87b3858fd4 +size 209386 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b087ad50008fcf5e4b68b9242d9d5873d4dcca5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31196328b79bbeacb39683e4272ff37f512184a2c496cdf48dddfda2c754cb9a +size 49847 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd670c3e46b492d22c90cde30dc10e413ab55819 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d2a934fcff6e9f080b2681c163c672e9f35f018bb8a7b6686ba540db85125b6 +size 31515 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f8cf9422f1c2ad6615d453d17d31e77094e80c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e2fd6b315319207cfd9a7176be4a20292ebf752cc186c35663d170c0d813d1e +size 36114 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6ab595270052d5f218e056862bbb5603af33e50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87b697a740f285cbbe69891ffaa068980d66470260fd4be08125bbad580900ba +size 29484 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7ba4a32d0ef42f0d28ba01d9a3387d562a3f8e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5197ab2e420791ed591d52e2de2418798091352a437e52718b5841a69bbcf47f +size 40324 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..712a2ee231004c669d751b504199c4c334a93687 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c2f52cc0a1bc22a49c21bdb42a1a92b2176bc2d85a92954d805dcb40bdfafda +size 27073 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a93175136d07d0fe9be3ca659f116ef144f521b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5388cf9d75423b34a97c556b29103bb46d773eb0e080724d6e82d150b4f57c7 +size 21711 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a77b3856d7a8304b57e81da2b4cc120d3a61c205 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc0848eab49a2a1712de02892a0af963a6716af3142da349bc5690e69d643453 +size 57388 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17713579a599c3a137f1a8b3da9fb24d7bc571ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65ab553dcd9ece41ae5cd80249828001d38de8eea24500ab24b5ff91b624ec8 +size 22276 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c459a351dcd307161249132221e6c040d2db496a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d775afcb3dce01acc0828af0a48727a35d658f3b44d02dbd4048d98d69fa0e3 +size 169161 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d18f13a08396a0bd0b221fa21a582050893138d7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7912274ada58d30372a7049afb566305b1f490471c0f4b28c45c450c041117be +size 90266 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d7e753bb21a0f28593da90a089d8fc0040028a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b289a27b0d27246ca3f17173a5f8623f5b1dc0765caf7f68e2a1949fc53c4fa0 +size 153988 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b8bde62f5f2c80c5b4e4fe62ff2aa40cb4690f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28fe857d9743683d18f9aa9ccab2a37d1b4e49e43a61bd41fb4c8353507bbca2 +size 79403 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37a9c6ab970eff4c03575a144d5b6ced0a778c96 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ef140868f3e67c96541484cb7b24e9ed9bd49de510f26bd94d075fcb5a5664 +size 74344 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68b411e5dd8e2b00505ab27a45027277ed91e170 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eb2871d4201fdb2912fa331bba3435c3cc5207e6fa48a695883c984930c22aa +size 82515 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6f6e2414469c8db1908cb6fcd8cbe38126136e0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c1066b900bd024193dc72af05125ad6d816eda35388d9a4da53d0c1b8b52d5e +size 89931 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ada66f1c0d82f5eb53230f7f53fa63413488d112 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b624b6af3bd867c996f14a4fc7d561350711e50dc5a16d1fd415e84d812ff2b +size 1005675 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09a0601039d8d0792774d581cef362be00e43cd5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94a646460dde028206c3de2ec11245a7e5be0d586ed77a7f5ef5cac9e1138d27 +size 133117 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac69791934b6cbee5b00eee1b37b768866159627 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63946145a5967ef41f5468bc13bf933fdc53fae30db75339a23e0a99cac01a0 +size 183143 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43c6e7b20b377ed0f975069cd64c47afa931255f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc3e3579a8ca154f276fdf0c34213abae240bdfc7c60e084424f643fbafae76b +size 27536 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4c564030961bc408e205fd0384fd4376bc67db2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf78cbf460db3002c29ee3733f7b8d47d1feb57c8433c4c01e0b126702f99a23 +size 120458 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d8ffdcdd169464de0c690d4c8110bdeff14eb17 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb368785ae083b34833da0202c63b3203896305fc1e613a2e1dfcce555991375 +size 57986 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e17da2301a29381d826fd91c07b23ecf731f195 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:446f0440e0bfcc4c8f1e8d877c3c184c000e5b5d93e6271962060ae3cea8251c +size 25398 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a78b7bb421237cf80f52a73063a5b79aaf1bf64 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f158e0e2a3b29d22fa0519299053fd4e26d4e4509cf3279b1516af2ae6ce3a +size 39475 diff --git a/eval-results/mmlu/0/ckpt_321/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_321/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1851ca3534864e2596c45b4324bfbae274af527 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e870d04ea3bf6f043140026c5d5f74a3fe5d8dffa3d61a6d23a5159cb80162 +size 32952 diff --git a/eval-results/mmlu/0/ckpt_321/results.json.tar.gz b/eval-results/mmlu/0/ckpt_321/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..742b944b0bd03dffabb3657c7abb5afba3513fe5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_321/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f395ab7daac76802420c15aff4dbe01edc5500dc4fffb5103c3944ceb13b135a +size 7600 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5437a412e41ae25437a906a655ee5325f45ba71 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a38c260fc54c1112ac9a33e1e771aacb5eaedb3df4288d5bc21d13e9af40c41 +size 17092 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e31386124aa65b53960681c82c20bbd0091a61a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd92b7cf8cd9f77144dd2161d2f5caa36f91a41e94236f3a6efb2d04ec1ecec +size 29852 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bad1511ed9bda09a384c1fa3b7539a0e83c2ec78 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf27a80d0ff0ecf059e759d8359a4ae2509341b5f1efb67b491b4227e9e3a77 +size 39873 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70d07de21452616375c858d25828150988f9c4b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e09de1156f53d9b8e94dee4766d6a48750d0eff704533222fa6ed7105aed3af +size 26779 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b5abc7adf552d05c171f7e1489cca59aef178af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc9796c3f26310e379583099ddc120c9bc956f4ffe45edcf130d3a7358f78efb +size 61216 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..960a4864263d3bd6cd4ecebd930ee118b1915e60 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29e238ef1578d9d50e3ec2e0901840b5c624e4d63114a57daaaa7ea85df64e71 +size 40375 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e1d9a8239be9f1854cd2435b24a17c954baa0ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e0fbe7d3eed28ce42e1190e018a09972ea0baf3c8015847e6df392345452d8f +size 23858 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6f52c9d9d25cd1c4bdfdaeccd04bd9461c95368 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3144cf9d39d4fbb7cff0da1a1b021b02f98f73a0ef09bccefdb6b1cde0e52629 +size 31113 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56cfa84cfc2b4fc6787a3d62075150e78d152c8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286353e8c15ab2c49ac92ec6a22505c208a4a5956f717357fabe281257e54952 +size 22959 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b4ad7515ee83f105c9a4b3a2270f7176a7774b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71029ade5d6f0cba77db0e9acf0d174e2ca1a00f8e9c7854a2c0b6f9bb9d4b85 +size 60975 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4edd6dfabcf313e2ee0691b55550586b2c87e833 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e470e4fb37ef12e555254196141a953416139e9d1083f1edf8c1ef414176454 +size 25760 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..204adea0786ffba51c68bd8e5eca087b195c2a01 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38153ebcd0610238615b2149de17d30d12b6939f9a1e3d64090a37d2b49668b5 +size 25784 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3dc64e23f296d8546a732efd4a333ee5d56a15c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f3268c5f4f94c33b1b745da31241467705c0376f84f0620739acd512f3d710 +size 46537 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be8164b028e383a368d278caa3813e04023ebe46 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:605c3b5d0838983536be182a1fb9141d0380a2915dcd0d0193c7b7bd603adf87 +size 31521 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed0b09cc560f20b2692ef01b497aa55d472951d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:933fbc6ed750e71bb78f223545d84dfc18d5248a56dabb3a0ef52c59e0562b39 +size 28837 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8211dba41d0304e59cf02f06141d10db17183140 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a703b06858de27197c50cb50115d8a01a5be24675dc7023d0b0446a6e158a98 +size 74797 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d60b0d22c69b7248d4b2c4e4ae48cfc86f451170 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c48cccb0890cebb2b7f2b4c5766f7e00dabbd9903206e49f56e4d3dd1370e6db +size 30160 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..049d94645475df3f87c6a8d234a51287dbefd09a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0babb6690d71ee01ab404392b662499ed4bb8555f8078d9a191c65c516d5535f +size 19147 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0e87d44c2a99d9c8245292176960a5c8b5a7fdb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a50d70293186976c38a1a6ae332da23b2c241a16e6ea583f6606c4a577fa316e +size 87984 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63ba19ba8e3521f74552f60f286c87c7e5802fc2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64036e9f18876d4be3585add403a2797bf63a362773001db42a08838a6885c05 +size 50080 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..076b14c23e66b983c796997d39da47adf8a6bc5c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f355be5ba7b50a26651ae348f66cf2b37c105f36bab672b0fccfc056308af12 +size 31636 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..335965201af94a3bb3e2e68218797375bfdd379a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03ec046a1169690dee72909fbb88d5af5936fe593421889a9f7dfd0ba78861c +size 145417 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..901c10f91c69377574a87d9a136e5afa32bcce27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c8af724a268cce100838b479a9aeb1f5a04e0aac9e3bf0d3aebbd8707715578 +size 44564 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb6208718a23bc204c92b15510fe31302dfa44ae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4184bf21cdfacd08a9d324515ca3e7cef3387cb6c0763b60e5d0fbe00ad29e1e +size 54353 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ab82060b7a84f3b240c33c0d71c76eb2d572cc58 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28fc80b4fd4885405aca723713f0fd6429dd70e3fa28e3d710901f05f79cbb2e +size 92773 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..946d0af4963ab7d08add693c57389a0c8b713f82 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b258810855c92d75ac2e7a218630fc25568a235ecdc6426fc468ba8ad2616ad4 +size 57138 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ccc6385366cdab062d34e521d8d103b4c70918b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff21902ad25132a9d79227a2192cc0ed4cec8a5926fc52d0e4e581c0dcc1536e +size 58474 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f945b18bf04ec8dfcbf373490efe281fdc601b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c9d05d1a5bf65096a2f6484b9ab73c789fbdef316dff239e21497f54968432 +size 42934 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0710d922d4c58ad7e4a1962a06ceb236885b3665 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72c60dc8bc6ef3b4f104e19e3058201562f7a9e6a8f137b782737ca8d88cea08 +size 143801 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a85e5e5d9dcd3b2ba45ce70d93d9dac8c9f22f8b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6030b2dd421b796caee85600bc16c965d2d645dd5599fe044c5b56f7e330234d +size 71262 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef94b5c4a3f5b3a17fd5807fdd74448a84db2cf6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3064b5fd064ef849621555c96ab6807b4639058296d7d64b0451d6351a45c7 +size 161990 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d42ec5fe10dc1a9b3edd25cb09900f05ba43ee7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53e65c96915886f2ba842ba3bdd6255b5717e92b5f13dbd4bbd8d559d4820b27 +size 209402 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ae3ef1b3c124f919a5f3ab77fe684fda834b223 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:717456da01b0362c07662381cee95e8d05da7c7ba0a4505b5f3dec234a9df841 +size 49902 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84039e2181e95c9081479fd1e9c437e34ae02ae5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5228fbad20c2559069a27beea40261947b765891a4aa4b007092f1be0106a2d6 +size 31516 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83875ee083ecb7477bd32d19d09e15564f4c9a70 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e5ce6eed79478fa4272c62d10e65d96b9a0d5f7244bf0415cf83f6a1046f5f6 +size 36073 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e19ca7145fc890cc7e0450c5dd583f60ba859682 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd0606ff6708bf1cb1e20c5741b53247b259bdef69f4cc7552651b7630876285 +size 29503 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43c0afbaa4bf1f36191e97940799285206839fe6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08f58e713d687e99fac5c8aba18e1c2dc0e202e3ab3c87a9c083d675aaddd860 +size 40329 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65beda050daf4864401034d370ae02607b124c54 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dce17cf49e3b6cbdce2fca00bda35a18ee2f25a63499cb8e41ea556bc4c65da0 +size 27080 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5a64299491acf0a2ac6f3021c6ac30b8125ddb6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e127177a063ce69d3262158414fc855dd3f5789354f92455c71cfd29c157a190 +size 21704 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf531e8e512cac8f5e2bf0dbe1f3b53a630b37e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f9c3518d89fe66e986aa3dff1bd21042836b389c2e2728ecd739b290de5140 +size 57349 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50d23f2c82e9f32e53449d962dc315035a8fee4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5022adec32f36c9e5af8df91f8888795a11c986e5045e0a85969deee2c53d2d8 +size 22268 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aff1a5822dffb3976330f81e4dce9dfe3f83b800 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:011b6be174d18102cc8b2f657f58884df95be2c909a67625028afcf4b32aec30 +size 169087 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..373d36fd28b48e0179289d0e8a0d35fd92a56b74 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb595a7e610ef903504ff998474e98ec361c788662ea4c934497e88f12a30ff6 +size 90238 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3df08d482c0852c3d1aec59f2aa022e12d530753 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:873300d4debe45302081727d332603f37e92660d2bb1616df2bb33ba22d44369 +size 154216 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19480ba76343d98ab2bbb118848e8e35d7d5e1ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38e200263e34a5a6551baed74504e5407260cc955d9685eac1814a6abe9a3279 +size 79449 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b9db81567690070a1b775398a9d3cff3a724827 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b030aebcdc38a1a68b26157e6151240aa4fa7e18f4e6e82e9976ca5ecc3674 +size 74323 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26db22126608fa877ed861dcbb1be5d7b0ae893c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:416b8cdab45250d3f7e8d8f5d1264be95eed0f6f0300b082a268b62693d17891 +size 82533 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7da510c3eee3579ab73d5daa93032d30c0f03854 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:597c4f5df847828525dc5b442fc178df68eba5b53d97b9c7743be79dcf59fd67 +size 89942 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..28c65841ce738a8cc11f7cae9b1643147439b157 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d53fbcddfc72c221aef04e95c999be1f52e2114dffea624b60918a6094e4ea0b +size 1005420 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..33de47641f90d03c059ee9ccd1903e3daefe897f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef9d0d33b6c4a99eb89eaa39878e60a670877a97cafae3d9d841b388754e6adb +size 133089 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b0c91903d7bd9ec274258b024ba0cd9d52f0de7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94a857b06e77102786928c16b245c7507b0cbd103a3a0ad28be0d26ed77c3936 +size 183045 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aff94570b8890df761f10c53f058f3dcdabc54f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:badff0de01a4531c80c71300960efa1f441baeb56f0316903a0b8969c3fd746c +size 27543 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c9026a0415ada3db99dc49a25d1251347983206 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76f53410985a2b4b8babf00774a854fe3d16228216650cc3838cedc08da1810 +size 120439 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09f262b3e607dae717e8acd0108d36e90db5e0ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3966dff974646985bb76e1bab620b7e0273df8a149b93e51c4ba51655929329c +size 57978 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5088ac6cf033478375338acbb4c59f7786d7243 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b8bb407ce1192080fecd8e2b8476cab47b086a10487299dad48de9a72f0e37 +size 25421 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8eabe605e2d1ff95e8c33f100b2eafaf57786fd2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81ba8decc6a94dcd7f76472a86b255a2ac66d24f616cf29a2d2372d7cf1a0e8b +size 39456 diff --git a/eval-results/mmlu/0/ckpt_324/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_324/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86e6f7f51ea6dae3ab8f828bc7e8a13f0deb8214 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e68977f3611cf23be0317862a01cb9f38fdbf15d1df5079f5b8ef9f64d11617 +size 32949 diff --git a/eval-results/mmlu/0/ckpt_324/results.json.tar.gz b/eval-results/mmlu/0/ckpt_324/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7742cf655e543c520aaa4bc6a3a3b3d0ee4b2bc1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_324/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956c87dfa486a2cf8562cdfeee35d9927963625452e449c6dacf0ab3ac33ff27 +size 7574 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e24094305bf10056d7b3f326672b3d54da6fb01 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d274829fe89c4b0a8830b90666c434d23182a3a0f2ec6772a431930ced5053f2 +size 17100 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a78fcfe02fb5093589fffc71440f498f6aabb28b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eebe8041e970a07326c5b715efa8e837866c0c3eef78d96a769aa9e15dccaf7f +size 29840 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..086e2ddb3460568fea1f80de6f236999356f67a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b59816c7338c95624188cb2bb5c5573c9c83c7fde1c491504ec92e5b835b1a4c +size 39899 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..699595c43946981c42e8475570e4bd92ea7c90f3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9aa81b18f6aa58838e35de00ef0aa6c2f4352c9aa86d43e991c0a4574baac6 +size 26761 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be98eeb6ed5c64683eee0a9969794dd03ccaaeef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a7ca462cb35d4b1e376c5b3d9010f15ee1502f55d0f36c1dcc0d7e41b54ad00 +size 61210 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6ddf7cf5529038807447016216d063749549147 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e365448593b68a91f23e19c1bd9916847dd1c4dbbc6ad16390469c75a3fa0ae +size 40402 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b68134895a4cb3903de47315c9113a1faef92b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87238fbe1d37b55740cf96f52bdfe2d6de829607e6e1bb162965bd3906967a52 +size 23825 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5272072dfc7888174742168dfc0452b95c4abe6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebb1c199fbd303da8741cdb7f963f861829dcb067feff0d532ae69fd6be9a8f6 +size 31137 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43bb96250a82858a315936afe6fe4ac9309a6663 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acaeb30fa84cc6770f31735c32418cbe6461cb43c9adfc962c0c5e9ce9168c53 +size 22934 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e6c3a30fd6c9a2e52bf23461482fd6fe1dfb89f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:801275c88b3b713b4f9a23513f337fa24413508df0f755a94e40cda19d9b349f +size 60932 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63f1ceffd89030c794ec775789f8c379046efb26 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fbbecd71f3fb221e2ee470e118e881c1a272b9e8b0ed4513ce91abbbb06bc72 +size 25760 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f67563aadff6b213168f30c3706c3e621e3fb49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da4b9690330185093cb35578a96ef76c10e2ff8cb1f1dd2004149af4b9d4896c +size 25775 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd34150044a5a54e8400e491f7b9a1f6d8150cf9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83dc4fdccf198c2cd6696cfce3444d5e551dcaf6e93313f62b679d55df96a09 +size 46573 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43b724dba9d6f483d070ebe09d4668ba6dcd8d3c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ba03260695d1f39b1c3a5ab20881184eb01ca299de67db92d6b19eec8f22588 +size 31542 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68d4e77a39e17d706151a2ce062b4fd018f991ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d50a37fa62267612858ff38164622d169d8ca448400a047382bc8ea98f158e4 +size 28812 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..343ce5008be642d52df74ee1fe7707f6cc716158 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78952b7254b98f4c3e2186bf1f048bf4620ca6e2c644ba9df3a6e8de0bb7beb2 +size 74894 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c77b09eec8f95fd3e03a826159773322c57d0af4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21c2f7d239c616cf29cc5e816252dfa1894901401f8ea35d302827f4c48d2b20 +size 30185 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd4e601907101c71bd161999f535831560d2069f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832228c6efa8db9feda2453f236ea78e26e1146a5353d86248a64bf0c5497d89 +size 19116 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36b03c358a3764da8e152125a7ad2efa79521514 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7389f9a7b02ccfe420f42b823280e896fbd763ce21c4e9ecc77145ff90338994 +size 87952 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b65b399e713f4f20a5b458e3715364a707dfceda --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e78afc5fe175c37fe9fdbaffe0e727ecac0d85e78768364c15fc78adfcf0c984 +size 50063 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b296264f467602c55faba388334626ddb8b282d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa25a78d973645e661b34530f174ad119e174eee00b9593e06e6dcbd214e86a +size 31650 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..734a863e950fcdd82139985f25d4dff372fdf81c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63785d6af1cded21a48e52ce7a84ed57270074a32540aa33401d1f185e735a0 +size 145503 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4670210db38794327585de019b974756baca0530 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a37daba830c1e510bfac916e9a1a7d011a9c7bf660491681677a39497819627 +size 44538 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e65e9d28378fcc7742e0df1235b46c029daaef81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a31b37f63c09cd849c7a5dbda4e093a60febf31ba041e2144d7e03a67aeaf8fa +size 54345 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64c126e55a4ca2d684c4f7e1526a424e9406d23e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30d1f640dc028f647e59d9e08d5bc6af56bd1f5a5172f1944a8d539375abf942 +size 92787 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b3515cfc2cf3352d01d4d1aa42ee6535625cbd2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d681dc7ed3ded7c2b0cf2229625629309ba413f032bc8370690d9d5cfacee8a0 +size 57201 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..024641dddcac4767e8e3faff475ce77ac3b39f56 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc183d7c5164f55f5c61174f9b9d4d916f8644962094fdac9869dd4be9449c8c +size 58502 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0add4b8cb9a8e1d152020fbc0047934ba92c6c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be1260580401b7eddeb08d46a5d1b4f1b23e9053fe602b268b1bc5b0be45562b +size 42919 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a260e80288d474ceb86c1a7c2cea3c2ff11413b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c388126f6e43f69a99df57920fd205c5a474a9f40109dcd669eddb59cc0625 +size 143770 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2748ab1bd6491e0c58fa4ce1d81bf91c9abc34f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f29203fc73b4bd473aa4831dd27c75398c1ffbfbf1b062298456097c7d30054 +size 71287 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aba09703aa63f2a24a556c5a22421db29f0c60c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3317f3dac65896056ab9e4e609d26cd0f2bc3eac42840303373789bfaf7dcc +size 161937 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9349febfa8a2aef7ac3b336c9f9d92d5e6b8ddd5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93aaa8931d530fa34db08a02159c3d344dad2cae20aaaa2280ece68e0f1af40c +size 209562 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbc4d97293729b4dda621d0c07b907bddff9410b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:708b517885a6d017d0ed2fc6231d037a68a8992e6a5abc0ff4921b3137419de4 +size 49924 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93c957fdf3ae75faae4cc43f6c2fea4edf8035cd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe1e87c09c4462813714068242f776a1b9f755390e97db65fd7e9f8ab2f6cca4 +size 31560 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d28b0e6654e893bf2cb7d90cf5876b90ab85a8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f89804708255fa55deb0ea0537b012033f0d19e8ff59cbcec245377e19606d +size 36092 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55d0fd08d13f02ecfe356c8cbf4a932b28d54755 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0165face02dbcb24100569d6b51e2f0d928dc6dd90dfd8c8c121d23d5137a7b0 +size 29493 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..359587a5fe1f089e71490fffe51d786e1095a0b0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad57da4d84cf56445c80550fbbf5c1d4e6e938f70cd57419cd05121d34d53bfe +size 40304 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..911804e416a8950584e6aaa0bf3d2ef9b7ad993f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1704cba5d267e66553b983c12fcfc5bb9dbd1fc6c0f5a76b090fb63b00de2920 +size 27057 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3fc73f0ed34fe7cd8281541775fd0159517b46b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae423ed5616970715d1930dd78afd0ce816b0d0d9a06e0c7e9dc9e3c19267edb +size 21703 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31c2245eff9407feb57c0304920f581fb1bd3b0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:127a124f857ea9e90694f8d4559e9a3d50a3c1f947e63c584a6aa9f86d1c04d7 +size 57425 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..974cb9fdf807c16a51c21331bd23d6d81a810e4c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a301080d8120630a4542faf8f261b7f4ca4f44953c6a0a0a044b2d6bab603c8 +size 22275 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f4f88c1badfeee82cba246ba23bddca7adebb3b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5cd01f871ef38ca9e54c85737cd8eb216385d73d820fe9a9419b1542ae1a654 +size 169057 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfe037b9f999389b509e623315ac7e74dbd0116b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4861599607fce1c8641279004ba4581b375e53e25f0dfbba073225789c0faff +size 90316 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cade00dc8eda56ceeacee16e6d73346225788af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd3c7239c9ef4fe74c22679e5df33ea3ea525561078d259979a57b295714d178 +size 154160 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a9bf17f13cb2c04d0c56b9caa228555e6a62d52 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18794d3fb3b73f595b52221d6244d78baba105dd9accbd71ba6e8c7608b6a62e +size 79453 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8b743454cba1f747a195207d659a81ebae450e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ba614db43f01db442789b38363481221e973c0858abbac94fcfc2d989e1003 +size 74423 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..953c5a771eef5b9ec916c7162c7c0e3eef4e4fd3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ce5c89e5bb9900b3725106af2927f8dd0fb908f7a72d7af7e4321129b9ad80 +size 82550 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2fa5658ec3b6dce03048d0b90b1af5a2cc05e51 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:349c554354d667116ff93653508c8dcb0ca58b831d7cee5b173070382c823197 +size 89988 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15632afc37ea89dcba0a9708729339b88b98b203 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97b9d89d50d09e171d5b36b852242307a3b2eaf3a193d1fbbe0d2dd86b401a10 +size 1005877 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..753f1db0f32f6df6401b4e67eb1dd35152db74e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79e2e327aa572528385190969c5d3eb22f68e7fdb7bdfd56978629f4f0ff5c19 +size 133136 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8e1dcfaabf461495256c269a49b6eeb935fab36 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54d51af05a3e883c567cfe8d3d8e7d32c9fd3fa2181a69233b1da8f111b5c9b7 +size 183141 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f78eb4cf8b3ebc004ffe58c7d9958ba407856ef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12ec57929ad28a276a3ca7a5846a4b370d12267e904f73b56c561755e5654533 +size 27503 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4610892173595a0bd63b540a6d0627dc7a7baf3a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c09a0650db25205a382a63c80c900e60784e85450481310a490c21871b968990 +size 120447 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4089b6c73532cbab9bd811d8ebbf642a2a6fdd2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75cb8952be79b413b63b9699b36c2c4a8de6d99aecb754786868984e0107d476 +size 57983 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd8c62b278c2866c2d426ee1b2958f09ccd04e6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce007f2a429b0af0b45ef99ac2a339b264157813867db4d2d6784a82d871719e +size 25389 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04371baa44e734d45c5c837e2c62fd183bdb052f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957a526f0a059b2040037f977c1c0004332fbfa5292b3155f2ddf07c06f705a2 +size 39467 diff --git a/eval-results/mmlu/0/ckpt_327/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_327/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf85a816ab762bf00436afd8fe20876a5bc13704 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03f00f975c035e2c2801d72c051e5b6ede975e529e3934e6210a8125664edf8f +size 32911 diff --git a/eval-results/mmlu/0/ckpt_327/results.json.tar.gz b/eval-results/mmlu/0/ckpt_327/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..347c4e1a8f3394966190d54174f42b0ad69b27f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_327/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3521befb3e63def035f10a19dd3f81e10319c59c865b4270d41ed756aa21404e +size 7596 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0817cd706c74ff63accb4b5ec1cb964a0a204973 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0579b0fbaf5437e4d202056453c3e66968fba8a7a72f62e35ff98396358ef308 +size 17068 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12ca61217959453dad40eca1772891dc9c41c59c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b50dc830cf209f9be33de6ff3d75d477a4f504677328f78ce1f48d4a0b605e +size 29851 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4adc4cc5202a925be82ae051851ee5884a30222 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:125cde4c3f92cedd98624f24809a504b357b1970ff89d72366cbf52c2506f203 +size 39840 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9487e26dc0554e25bb1854f75edb2c0162be32a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a0b657ccceae8aaf46a64af4d4edb46b4f2f89d964629e2daa61d31f77ffc9 +size 26763 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae73c33aafd7ba41eb3cd310295f40f934f73f38 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd8ed0b115ceb4eb7d096325047e67a31acb011652c34ace99c0610bdc47432b +size 61274 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6ef454eb52a032433c7fb3304eb42048d683c42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7914443957d56a58b74461d042597039e0d27aaaa1d65d2426449d40515b0ab +size 40416 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bfd1eba8f80c68354dd7deaf408b39be4a8bf4c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d8f2620f9c4dd29b32d3218ae73c092535459b025560539024f832a33716540 +size 23785 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6baccbeffe38106cbcbe7f52ed9154c93262969 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c923a584091689a338887533644309386022557cee647b98997e760a9612af4 +size 31127 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b79aeccf04c6321f502970d8b501f6795922006 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd3d2539b7eb815518355981dfb53d74dc21a5d964cdd0141a23958f8668110 +size 22946 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9c370d910586e44d6d4cdf6d8958021df6632d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8121dbff81512bc6728033deeade0b88b68486e3a2d15cec7e948e663458822e +size 60945 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..812252d33d97b2547a54bbf6032b88f15c7b5fb9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e83e3a1f6427647dc5e9703f0d0d793f9b155c0aa2ecb1c57e850f7f3fee6b7 +size 25743 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7a74526f3af6834f7830edc86f82ad1bb630e1a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67202ddd35cdcc8166b8e5aef59757af421acc0c568aa49483c018ce4570fd7 +size 25766 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8522133c9584d6254f5dfdc2cefb328129256bcf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a931caa7bff337f585d3c00e8cd327b307e870ee7f3e8ba21ddeeef148bf006e +size 46573 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e3aae3038e27fce22decc30683d2df188f61cea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de948bc6a056120b428afe00d73aa5094835cf3f4912340fee10ab4f52d3a4d0 +size 31524 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f4033ad47cca41a210242704b206d849bd86794 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce158ecc2fa8b861cca22b613c75e60c649ae4780c33de4452ba1b1eee094f5 +size 28837 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad6173835b6517c588f1472ca1a97420af394388 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e987dadd8574999a3e41ad61055d49bdc6828ace197357ec194e161ac26d49e +size 74675 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c532079214a9a70a8b53f76ce287729678ab6ca5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dde4cfd260e3111d2e889248e611b60d898eebdd1775c4cb2adad08464bba9e +size 30154 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd92b438938aafc5afde8b06b87fd07d748ad18e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6a261250ec8b39c787475b564a709c185cd03b2ad60f2f3f01befc69038305e +size 19138 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d200ea3319447be3ed9ef9eba1c04ec212f6712e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffbf45f9736d455957f655a784322299798b6ee28a7f411c4e78b9c1632158b2 +size 87981 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1c21d3726bde62f09b5a1b09f6af8adeb5aca15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41635f43e20282a76fb544535706ba9ec57c4177b8bb91ded8b6f8026f924e31 +size 50048 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..efb20f1acf74d94127b2cf1ed0be71f1ade941ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dcd8d7a754e3e15e72a0baa5cd44e0580a015df2b9417b3344a43b5812206a1 +size 31623 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09cb1b1310e438364d7fc4dbc4d2f91967d00c9f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa2054f8f45424473e599c6f078bfb955bafc3c60e525d754ec8cc17336c700 +size 145393 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e24853d0400e2bb3085f5b9f7d2b3fe319f62ad9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aea769f5aeb9f3e12192ec0bf4a716ad77fbb0c1a66cf4ab4e7297c122c9d46 +size 44549 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e1f8142b70ccaac1b76aee82ff6281b4a5be6ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4081649d018ef826593be5351be3ea88a5696a17d73b5d90ad736b14e385bcaf +size 54331 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b9cf4906efaf759bf7ec54a83f69538b97d583d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06250e7b036863d4b6f3da859828778dc8b2c34200dbb9072d9fe40e59f7ac02 +size 92810 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b4b553d12ccc528e32e069b47be9aa315129fe7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48df8809e88554c6c336e86e520e2165ffbe530f9754c7206973aebb44c50cd4 +size 57148 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ef68b9239be9d81e16352ba127948d718db0e99 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dff321a4e8469b55d565d1d86b43ba3f7dca12c23cee9c6ec885827682d9b5dc +size 58472 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a96c1066f9e20ede76e920c15aaff562cfbf3cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3a347d90faee4372ac2817235e23738201c62b879a60059cab3cb55dd533c2 +size 42939 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2910d92047e4658c4ac6bb843b97533a23da9a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54e1e2cfc948305337b8a1fbc625fc22488c5082fa0a47f895075bb49022ada5 +size 143793 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..747d097fab50715cf4d4248835d0ac777f88a7d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d6aae30b827b26ddd6307d48489a8b197d36a884d789510a2e602469f7468b +size 71199 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c00b8d98851d7cb439b349dea0edb15b2135ffd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf8b2ca91b4335130bbd1dc8fa9fc132229affe9ed510ac2d31e27582cc684f +size 161924 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8aab04d8679e00e1813ddbaa010474fdbf44c927 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:575746083df645917de5067c146c65d0612b6cd6fbcf7db077d7b6ecebab8203 +size 209471 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6920b1af5d74c5f81a6093033cb76c88756f850f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a63dfbeff80e629e105a1feb6c6dfe0081838d7b0b3866a7417cbef9069d0b1 +size 49884 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de00a13663b284064ddabd3b69cf3c9cbd57cd81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9950328be0766ef131735d78988bff2aa91855928ae3a4e059073adc29c9e9c1 +size 31554 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..328c1b58265bbdc9964175e94e9f97dfdbcecda4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed0bfd5676ca02b833331e5db055077a1823e08e1302cb85f681de9bcb099927 +size 36040 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1298bb1a9cea708e92508beeab47301ecf585a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f86ca44cf713ffc2a41395dc75962347f61045b3195a766072f842acedbb945c +size 29501 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a92985073727a558c0baa30e6e8bec435f57613 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdbe8537e612f74c29fc87c9db2da7d7dcd11de3baca3fe3fc3c34acd81ed98e +size 40338 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..01fad46ef14e36ce72c4aca4f5d3a4b806062ee0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db31829a073963b79ddb3b8e876f7584cdc459b179b7828bd1639c8d68322e6f +size 27035 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7cc6d8736f072da675c84f845072492d461f7350 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd5e7bc696c0a001deb83b5bd8eaba7917637b0de5f99cc481b4a4260c0e630b +size 21699 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63f91910cf172e9bbc040fa06eb35c138f60f638 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bf7743d520d36830fce327c7b59fdd5988bc706adeb62dbf0b50805667a01ab +size 57389 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1690470be03a6e61782643f75eff1a4eb616867e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b8878c52745a7db7a7798e5bc11e31f769de9e0b8a4e2c5e4cf3cb4be0599ed +size 22253 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fbcb5970689d3fe11d10eb839a2a1f95eab10d6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b33c8790c8747202fb0768461f4ada1b101ca7c37dc7dc5a5a34148ee341f112 +size 169065 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..925ec478aa32d8ee728fdb41e51cb2813ebb5b27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4c73e8e4b62a70ac2c473355e25b92515b91d35f0b7a37c898234a1f3a5705f +size 90238 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0701cd9545cc11b8524e2ae1155782cb4809909f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312462ede96051b05b513e94f985a595eb7df98ad71d53efe27fe25416a51ba6 +size 154493 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0cf190754b698de6ff1fa4a09ab35c2fba8d3d8b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c13bbd1b1ef1ac36cecc1bc8aff94b04e67e3c0fa94a21f1e193f80e8550b6b +size 79455 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6177aae3704c7c845a515375a0b20a41a52a3015 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836aa05d121efdc56b2aeb2ec8cfd24cdb180e7d10f68e799b2c8f3e18f20625 +size 74381 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b38e91bd8c14b51c7590abc5599df6bf5905b99e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab82e3783223183ce5da3dce12b35b589ffefda731099d0d5fb3d2b9540c6ac7 +size 82623 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2cecdacea8f8ba47b96f1bec79082c6e6a48a63f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a5b8fb9854338e0d33f3fe1224de4f8079a90c7528a79354d82d584e239f16b +size 89917 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7959cf2eb18dea4270e11a411c1bd96e2a499db6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:108aa187b62b733de1a70db418078247a9ad807667d26281567c5247712b3247 +size 1005368 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cad21b86c84c8c44f2dc208fdc807d0c9923b21e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c28800da03c3230794111a08853f4b1d9f1697b7ef2325d8606d3a8858eb68 +size 133094 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83e158b3b25ef61ff5fc4a4f69f90149239d5a3d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26fbe834dbe19711eb51149d73e28c1373cdc29dfb965b6dd210d5c3d6fd5267 +size 182976 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fc7b00009d6dc373f27461b2aa9d6da7b1d2ed0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67475f7c671db0a08cd2db6108f3a4367ef02c89828e82542365cdca54735fb7 +size 27530 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c779e84040b36a15a0674e2eafcdc99ad5be1de8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41409ea77a61a312337d14af3b1c1420d6aed1cdc0b4d94f224f8a6adfd4fa14 +size 120491 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e8764016ad3ab4a208c25c242e587ffee2e2ca3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cd60640634c5057d5e22eb5d8ac35336d05bcd86f101b6b408662c34eab79e0 +size 57973 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8606792a85927287d5833a481a78b78cadad9b06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13c137893f71fd8868fe984c9e8bd00dc74fbda45592209f97d50646fab98c4d +size 25410 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b7ca7b57713ba435e317ab9dbcc42954f24ccfa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b672624c8ed34f081474d21678da5dadef53fd8d2ebe59ee8ecc1e128c702cdc +size 39434 diff --git a/eval-results/mmlu/0/ckpt_330/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_330/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f56c7a99c4c382bbce23ab3a7c0d75c0c1122f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b8c3f6024f2880f2719b88e20ec702754fc9b9bff0bf1eda67673942e6d4d0 +size 32964 diff --git a/eval-results/mmlu/0/ckpt_330/results.json.tar.gz b/eval-results/mmlu/0/ckpt_330/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2980b15a7fbff773f97988e6a9c6f5b7e92f03af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_330/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54e69e41aab56ea7c1218c0b7a924c70ec6adaad237d261bd8307598713df572 +size 7611 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23d29107dd57f3a3b0d46ecd92146f3f764d469f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842aeeab64cdc7f89e710765c0b6d5cb28307ae4d8d4bc6aed4aca4745e6eaac +size 17081 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d00e42d57984aaef42e5a8bfe952cd8a221290a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f53e92e32899d55a3c12b8c12ebcd391ac1b362dc7a2db38720f306b64de9116 +size 29854 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00d3ed1389f65c381e10597c9000f1abedc86e76 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8045e0326c72fa0661f2848cfc41cdfd1dc59f68613134a6b767c6777bd5cbb5 +size 39869 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f1e9ce9118c8bad5a6bd4c03a66b1237f46ea9c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a81fceb1d06a451ba2e17ffdf7d90f22b1ddda7fb1bb91cde429ad5f4da65ea5 +size 26757 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae1825e0043d84ba1120804192c35f5a34401a67 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39fd598891e1c7a9e1b428dc2be42081666f5167e0860322c733ce2cd4f3f51c +size 61266 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a13585b27958d815c923a401362e92f979dc45 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b02d5daf30606a727023b5927f23136cd9ac2a7de37e0c25ee66b10e9d3df8 +size 40425 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2da6ab59411371a85324d7b440f5e09772efb132 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23035aac9bfe00ce80f35f0c09fd36ffae66770cc3a16fa7fb063c3c7301546a +size 23790 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1036ac295cbdf830d53f4397a90d20ae0682b576 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a718206e54566a2f8c9a6af3dbaf617b3ff581e6cf7c20c5f49383ba0b495b06 +size 31125 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15f455aac89e7a1141755072c685a0067d3c9e06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ce279060e1755e7180594a5a144e2b9e247a0824ec8dbe6c8db68b45791fb73 +size 22954 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66225d1e3cd19f298cae807b6b88746d227d1d43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7402565c59b0b067adb2f760cbfb3a1a6b3cc207e1a8bffa1c7d13019eb8495b +size 60973 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e98b78e1ad8ee6b627b24d4d94ce644ff841a83 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d99a81114148e1b07e6ccd8004c8a579b1894848ec3c8831d902941b657dd7 +size 25776 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24da2861a40c6fff448cf4a401604d90a687693c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db70aae96d38307c686c409694cb0e9caf485507c490ff3eda93d292c48c9497 +size 25759 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bcd1d46dd68183f556fc2a4b2363031779237105 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14773e0f18c37548d6478afab4eab286a853a16a4d1ba6d1a415783b6cf2055b +size 46576 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17b738c14cca78aa722d4d42c1a194e63e469474 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b83d649cfd52ade0217615ff5c68f80cfe70d371b87aadbd53e9cb4b5fb7703 +size 31501 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2d0b8cb48ab13f6d41640d14003b982f34993e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5dbd0b1f4bf6a8d880a6e8ae1f8cb14ee9e80aa779db6b0230b238b9a5038c +size 28795 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74608b38c5fc049005af7af5bf237e97e8b5f4a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a81d6719617228d242e55835a0b01c564f41c9ef1259998245291bc0b0844a +size 74813 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac91d71cd3e849060526a81417a891adbe7298cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bce5f0bcbc77e180098d54d24cd7d4e79178a82ea0a853f80e62f44df5f5ebf +size 30137 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb44b57b298f5bdd6f30a613f0e08d6daa3c76ad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d8c339120c49ec92f16d551e3b436cf6218749f0256d0d7288107304e8be370 +size 19130 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb97ecb04b40303de0f6a7d9caf10d8f3745a251 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c247fd88527f1a3bb2feca129ea21be9c06c2ddb803bf1ce4c4c9c1d4be2660 +size 87969 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f85cd7e07bb97b4ea126e0204192b1261fd53d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df55bb01ca3a8f0acf28d51d049be33c41c4d33ab07f4e8c101a204bc3d50159 +size 50040 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d50d4f14968e98bf8737b3a80fbdfed96f4d82ee --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf814415e045baaaf222dc5fd319ef2ba16f816105ed6f2ee4c8f2d2db6bc99 +size 31614 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b008a87ddff94bea46d2521d5a2f12fb9d21d89 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e49ac5a19fbd5d5dc5c3af86c3f5fadd7e999ced3d4bb4f6e9d9cc8519b2c1cc +size 145462 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea2569be6bbcc76ac1972d8337efd0cff089daa6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ca31fdbd2dc17c0c4b856dca0256a1285edbbf5e0b4956bcb14efe24f862e88 +size 44560 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7bfb6cde87c6ddb91473465127e2166369f5e69 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc32cde1ab20c272c280f4741851a3a402f30a4fbbc0a6d3e53ccdb0c3c0fa8 +size 54330 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afd7311ef6edc5795db7ac261e193442295f195f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b99a41ffaba7b97c12e3db11a740f78468eab61297c0cad8c20450e28f7e681 +size 92829 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36ca8634dcb901b40c3c215dec0652d822ed4b75 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65e76279b16310501721430e1641deb6814d52adc5b18a9b16b444253496be86 +size 57205 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a39264f13877a7b2ed5f73d15a68e34876032fa7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19aebd5ec4c4261cdbbf0ac3e9a27b0422031256ca86b609a06a4e655a6bc93d +size 58442 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61038536ebdb3093507e7c45127d150ac8a6fba3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:410a504ff367cacff99bb084cc0daaa540ee2b397b5e2be92b21999c833ef7a5 +size 42901 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e4188c117e1d149c815d13f29c0292845d1c351 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1306c6c483aa8cbcf31e6d4dcc2e308844195872529ae74b171b97af79c2d0 +size 143810 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e4569239822e4c7ea112e58de7372b4ad112d1c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5580bbb3d9f8d3ed365368d600e6ea92e56fa2ec98e9df0e2a8f34adf0f066dc +size 71175 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..44093e3b2ed44fc02d27bdce7bda2da21957dc6e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1589bbe6395ce1958e6510ff283ba5ef9db44c6409fec04757c762559b2a68 +size 161957 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d657bdcd0c25e6b685f6f2c827bc5f55da5bf544 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c43f05257b241f086e17d23eeff89bc3eae79bff6dc77dbf636aa5ebc4bf84de +size 209412 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5b8c52fe69aa5c8ad0afa1bede16c9be5f123cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5be4339ce8ffdac1c615a7b4a79c56e4c623f0812208ecb22833b2c47db7436 +size 49921 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a9d6fa1e589206f9f3b4ff8bfcc92b2bedd4722 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb653dea45d4f9a46b9e96bb756b2812824c6d00b4c3782a2afbd1dec3d6837d +size 31528 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1425b6709a8d4e9c0a664ec3189199b88c962614 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12abbd47882f9f0bf4de6a51e631771276a6eb37721c110921a5766d3cedf0f6 +size 36078 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da793b19d286630f2b9e60325a3b4a2044e9214f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:074521c77ed49b902f34975f01f7e59f19f482a90d2486001afa0be92ca0b717 +size 29492 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3a028b2b7ab625eec8007448d6fb60222d890a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf753dd09e3a35f4c5e0afaf311d3d40f78bbb79f9bea23b7dc0eda030ad30c6 +size 40352 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e7b69cb3df54e226d85852474a1b9b00ed610a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21efb429e97d2b6429a1c889a9b4763474c4624ecf37a4c95d0cfcf890f5f57c +size 27066 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7dd9d8cc9e11b059ac9658e00e5c323ccd9661e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f546309b3d7eac2f4bc61af38802e44ca8634c94db1479f6b49260a408b7b461 +size 21741 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53af3213e7507a28b6b5c6041390c53955698c2e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca54b806b1f854fe894739fa36ec7152fe94ef5c40a19025e62646d1ef51992 +size 57399 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47f8c037938ff4a706c19c6c7ebab823de333bab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4086ecee5800413a992ea449bfcac62d748ef6bb81382b7304fd17dd1059246 +size 22271 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..619866cb5fa5e9cb8ad42dc3a4f3ca076eccb3bd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f921f8926334c9b35338b355672143691c61bb2464292e518a97848b6070685 +size 169053 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96ca276bdae3d361d71838dda551dea7a6b94fe1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726dd685a61d27f3fdc58bbbd27e738ed21ea9a11d2ccb9562f6ccb5e60ae70f +size 90311 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3183c16ae4a7d1f7dc1c2eff514f2957944faed5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:465bf7ba6dea1ec85cf1a1c2d59b3bdd1e1f034a4730e02ad001cfc9186ec408 +size 154338 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcb9a240838c4967855c31e7964a5fa464089e93 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ec03e94c6d2f3fe1460e6466bd4dd2ef73340513e7ddf46a531400214630697 +size 79441 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..485c547b5dce20c15a386984dc65ea16a3aec595 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3a3507663f356ccf830c47625e3bd4d1a4f97147e86dc3ab9a12fb644c23a2 +size 74351 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8a70e0b7b44e8610cd63eceacb2cc16f8691674 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75968ee5cd40a296b27e2e95e145ba8a340343f6a5e716cf17f456648880db28 +size 82493 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1d87f4a718f11697c3b8d871128a355bb7d9bde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef439873291f42dcf78dc96c005959c20aa74069fab046691e582ce48636a26a +size 89956 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eef7e4621d7a7c096122a73ddbfde6fdb831439e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2748b8fd7b01c00258d93a1b0e9369d6357fbd3a74e965552bd09e78533d72 +size 1005742 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a20a1872093340263ca3896766b7cc3ed3415d44 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9205504017ddf13c5ea6d7365f690cb2fd5ceb2de1c3229992f28ec7af3ba966 +size 133174 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc2b8e2f6434ae5fc0796057f8e4a458fe45b8a9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24e66dcd5064e1a0531eeb53be386d1d94f45489f2e607b26338099c41fae577 +size 183118 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ce3867219e52c17a63f841531aca74d9da31822 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79fe2e809915875dfbf6e414ff85393700005462e40f77910e082cbcf45c619e +size 27538 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e0bd64bf502607989177f6f1f0028e88a3dff37 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea41a6ab327490763b0976d66f64e05e4bbd4d3bd28fe1d60e14c69a8358d0b9 +size 120506 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1f33a523e2d205e001a91cebbf53b98d5aed20a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e49bc409be6c3de0d436ada19c6786f24955c14251f6d622785ec88b9264d21 +size 57974 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e6da4cf04635abae0c5329b25ce9fb53a877b5f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d0ddf0b45d36a5d275b504cada80c371036343874ea997c95a655a3b5c9312e +size 25417 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a47863f663afa7bedebab7b294b16ca65e271e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68da3bf533882f95f5adabbd6bb35ac3d331465d42fc5c1c03cf5e60088c266 +size 39451 diff --git a/eval-results/mmlu/0/ckpt_333/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_333/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..623c8f50802ab26cf684ffea51f2ef353cd39d6d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35557ed0f587f25a730884d28dfdecfc6cc4a20c79bbcbf0302a15b6d69685e3 +size 32933 diff --git a/eval-results/mmlu/0/ckpt_333/results.json.tar.gz b/eval-results/mmlu/0/ckpt_333/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e17cc5a824b9899de66b165a85af98c46d2437e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_333/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93cd56498ce76222ce9d90a0af49201bb2e70d51c1eab755d3215660f1ae2bde +size 7596 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7614fb67f52d08f30c6cfc0f80e428dd188ba7ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22dcc3b48fee54dcbc86254b7cf2168c82518ba03ba322cf319972ec3242f6e0 +size 17087 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a917b6803bacdaa8d0222fb07328cb87d23679b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02cb39900aac9e5704b1df4ed47ff37f11f2e5d6ba505299692242b731494799 +size 29802 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ecfc3ac51e04731a3692d572389219c4a9ac6846 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29cfee0cd3578a1967e9d0deee6fdea2351aecba6f5b09eb88af880ab7ac3b0a +size 39815 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c71c24dc4d1d3264ef9c16e2ef17ef768d5699f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0e44c4ef79881df103470ee84a03e935249c37d90c8d02183d1ae2f92890576 +size 26770 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bee1a60b0af25110ad874d32627113ca891cca2d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896864af496eab10617bb5b4df51b3a778c79dd47c4be04abdfbbad9ce1ae023 +size 61227 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32fff13784cb1dd5968707188d22e911cecef2a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2870911118243680881d80d5c735a2dabc6826282b3892596f2003cb00517683 +size 40383 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ee860a6239399e8ddb3bb2389c9ed0a07a31313 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b75a565f8d49503da18e7e9d2a67fcdce86f014647587db5641cc22fb1445445 +size 23806 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86775e9ea2aed6b2a5227e8866e3bfd70bca9a20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aeafe268977fc71f89e90f664457e76649e8260b81c19dd407f082e7dd0700c +size 31143 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff097ab0024a9f2fa987459767b4380d20e89ce0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e6eb3e77db83ea2df281b854ce71ac131ba26836ce58097ba9b586832947aa +size 22903 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc5cbc09ac99f2ea69accc3eca5b7f8b758bee40 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eb6c0c329de06e87fac475f8bc8f74240d6ee22c3b42200b9e6e5016e982db7 +size 60989 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12abe46d5685542fdc8e1e4e8678c70ce2f43dbb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23c0adcde777f229c974400f264f8f9daaa11153d0a346bee04817b74700240d +size 25731 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23c6d199aa505bd9c3c8dd95eef5c975bac4f399 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458d355216871e8c420cbdac95b3f8b02967a34ba5362551f073c4927104302e +size 25787 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13bd09b8f5e53b0878c4ab44a230852f5cb683e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:190b6e20678d1772586456f3f539035504b4a150962555d8deffd99a253b5d1c +size 46514 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac210ce201a85c40d6e37f959b05974b8e52d0c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a075e2d70d0589d5be99f2e9c7b7a641587eab09e5d3835546834b18bd08839a +size 31482 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e0b740a0f846f889fafbca88e2ba64f7f8213b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a69231f3449b77c9b4c94514425e72b2befa07fe04afcda3b85f99a20c14df51 +size 28785 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c27c53d4a9043455bba338bc654770773313eda --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c98e5e8a214c656e742d2f5e3514646b2cdc8a7f497a6b4f430cec9990345cae +size 74720 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3e2165872facaab80a8b9341a5c290c1367f630 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:040576067de9c6ff3456793c695adc2c37a9b1662bc125713444d934a8462c9a +size 30106 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2faa96bf0c53aaba4343f8b5ea3cb1ebfb8dd5d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4bb7432fa90be51aafd4289e3a1763f93ca7e7eafd8e52901b3aa41099a4a48 +size 19120 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48937ad74c49896d412ecd1cc8c5af267ff3430e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77486f4450a1abc39728440b0cac9c061a388b47f852c2846dab1f4db49128ab +size 87921 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a83118549c763ce9148ca4dd6e903e8fc215b4ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e857688aebf11de154e11fe9a065de1f76544bea28a30c7cde21c0ce6e348fb7 +size 50037 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f9efcda5c6a4bfdedf5ebd0f113da05df1b483e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f16d0a519f2a311791b82f35ab40b33f5d85b1249da269ac7ff63643823d60 +size 31612 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78237980d6f76db55b3165d5a00ac6910ca9877a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0855dcab7f093a856fa5669af650b5bb7466d6112c0bed6fad6b5082cbd5d4fe +size 145462 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b04680db2a856c378daebb6ca24d0c41df03886f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db309ad6012062b005de3f896cc929a2c1011da8c876f41a9f11a156e51cdf6d +size 44539 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10cf653d9ae69e164745c702569e5654a35203a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2089c539713dadeb8b97ce8df5c6cb2c9272d6a6257e2d4988be0c027eccd3a9 +size 54321 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30d7c05f29f087dd203814961987c9c3c94e8739 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f623fbb16da33cdce915957be5e93cfe8f242dd12105c1bbdc8fd1c6187197 +size 92767 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ee325378b6f4acc8a89d108401a264465deecfb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeeabbaa49cd7a18b34b2bbc73c7bad0384be505fa6ba9fbf36a0c7fc943e8b3 +size 57134 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..925f10170f3ae4e5f391fc6d625b30cf6be4f393 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df048264df4fe2feef5e6876855fdd5e50d3047dde7bfb45c3be81c07fee889 +size 58446 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..25648654d5e2798cb76961a1ee1851fb96ab30bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0097ff066f188d4d46b1d1bf5a198b96670d774cf1aba091840ae1adae29c4 +size 42861 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14791a21742d5c726513899f50909a5c71e7a8e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bd34db7084548522f20e48600a2b357737ad672b232258fb429b0646c915e2e +size 143782 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..439acd95804027ee7675488d439270756f7797e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddf0c7e7db870c501f58eecb4b4f0bde6e87738a8ba10ba57277099ba5d18f56 +size 71240 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c410304d4c0a6ebca0f34b1acf7cbad9a7815f0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e87d27846fd6bc9acaabc8d66dfacb4ffd8e4481f2eb4edd414caa9d48409b00 +size 161941 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..505868beb6072e1c71da3503b3278c0dfc88138e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56c75a074e8f7682170e1d95019d1a6c9c283c236e271685edc119f9a68b369a +size 209443 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b54d0191a91c79fb0997f6bd0369703214982515 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1966317d46321ce171b62ba5a1a390b265606f3deaf3076f851dd5201c901b2 +size 49893 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5dc5bc6fb7b54dda49e023c8baf6cd92b914d28 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfe768848ccea4914678f562448b78be2503e9f07fcc56674b4a1b87e2d83fb6 +size 31555 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ed6d285a46a47d2621fb270675b697bd3bbef35 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a59645db9134c717767214a4f5fc66f282e50e05eb30814437d85807fa03f37 +size 36090 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0b0bdae0c874de65679be09269631f3c06fbb85 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00361db7c155f77d7c93e83eb727a4a9cc22c37c76891a5ed054f53cac3d3467 +size 29509 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83e43df57330dda4c0d19fa83b7f90e48e743e98 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff47bee975bd4fd800e982786ab829a6442020ccc837432d6b45d7911b0d073 +size 40316 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..792c71da38b6a69dd35cf2ad0f937ba960f46043 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a03c30cfbffa47816d77cc77a074b9cfa6b7cf617e5bce47f0424e37c64db918 +size 27027 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2ad4eaaecf982214a5e09ea0d87bbaa49d8fcdb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e32ad5db58929a30cbf25becac10c8edc26befa14fe7b9c6160e139f89cf8b15 +size 21685 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..559fa7c35ffc89f7c616fac7c380ce5e80df529b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:362105397cf2098c59e5add17a8a90c57227a7d7f994833e0d4c0e755aa44f0a +size 57388 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c663bd4775a6ada411133eae99f31c21327b6dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e4a7d4d05745d4604ac889b29a6399d8589171ac59bdf81571c22eaba3dd107 +size 22255 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c0f497f0d99a4ce8a73ba461a78b0f2d52e5c96 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14599756a2b95a3595ab1030dd5177f0d78c13fb174285841d4c935956aaff2f +size 169062 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34d2bfada868e4096253a113c60d62342eceb627 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d673e5b54a34b0b49f6f19b86d17853f521e1279c40b78cf2dc68c2c41993c30 +size 90259 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38c59b6f4c51c3a60e37506cbacf0d8fbf2c1d5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b80b15244f9cbac5cc83634403175d428cce9d390b0070c24b5a3533e2db6f96 +size 154184 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..604aa45510748270cc6547409c4f217d5450ce30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ed3d5127c69c3f8f39e9c1800fb1d84d0c1b2cf9eabbbd51c43a8e3fc681f4 +size 79405 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c308aadc45c6a022d2fd8af4d0935dd3717697a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e52d2fcfc296c461c3511a79fee2c2f14137ea72c77377c9cb0bc693322cfe7b +size 74326 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7f820e846a40ce03d4869a3c1e4d9f2caf79a90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb818630f33d323d68071489ca22c7bd49848de6ac9fcce3fc0eb79ca5c1c92 +size 82469 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00e850ea1840f0f48aaa989bf4972c039aef5c89 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab96fa7d8639b5fd56cc7c0aa721c85c271ea4583e1fe57680cd399cb51d9893 +size 89907 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbbd57d37b7d729cfb2e189dbab162c1ad47a592 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e26c5826fecc5a4d43690d22bb99c331b10b904ef785a48f006f7a63ff05318 +size 1005287 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8791ea5e545214d9b4c5e5f9ab08b1945c9d8550 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa4965761458b8d37303b5e02c98eb674fab8d93dffa63b79890c8e224ded679 +size 133054 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cea19e3022e77dc55ab961be277a979bfff89c0f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79451c6fb54bdddd5e6431acb739658b6eb8640ed21439cce02c4aead915ba00 +size 182981 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..353fd53a402f1228469172760be61523f8599595 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed0ea455c3608a02eadfd95126eb2aaa5c13fee81102f1d7b76cf8a1652afd0 +size 27528 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19f2498139ea2b1d665d18ee8037784b4f5e5e55 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df521a1fb0c6d1cd16a067ab1aaf91c0c0238ad77c20154351914b0cc2b3b730 +size 120405 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d50014d5ccc55b751c12bb8ea3d43202a6c91da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad6ce8077fe0376533fffc06ebb2cedbb9c22a4d113d4c8973d39081b72b1d8c +size 57940 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2dffee6fc142669dfec9c6fa4bb677f641133bdc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfbcbc65caa85b4e9f3e485b4806cd720950c86eb793cee93d7800f05e4f3ffb +size 25393 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c66d2f5260f779c2af7fc6761ae2866c42153493 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1310c84d38e12352e0a52eb83a37036ff019163e21d21237eb81c5adac944f1a +size 39411 diff --git a/eval-results/mmlu/0/ckpt_336/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_336/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c49707b62c25abaa18c82838f60fb6d8668e123b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79aec6e5beaf9932483fdbc2fcba1e4b612d3ddd637cd40d2d36705d4584c248 +size 32910 diff --git a/eval-results/mmlu/0/ckpt_336/results.json.tar.gz b/eval-results/mmlu/0/ckpt_336/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f27a9db20ae4bd11f20a822f11a0cbc6dc3ccb3b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_336/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9e0bdcced5fe277473ce51483198d4cb9eebf577b07fc6b477451f6500fa724 +size 7600 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39f5ec3cb2e161964ca0cd5173113fc0655b7e1d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fabd6682646aec986ec2c1044969edfb5287c91ab4427e04e5a5a95ccb12347 +size 17093 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d86fdcf9dd36d6746145bf34ac15af3e2bf7d9e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b59ee2f3b8f5e073e8895fd7bc1e0a5dc3e122ec9a110272ea22c5fb0498e41 +size 29860 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a07f3e76ad2201852b6f70841a5ad9d7b0f07a0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ff58e2a583b81aea7c9c36eb5dc237ceb3a491faf162269d16d74f4c238d77 +size 39881 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b45b6668212dbc03d3320051a6f41d5764c2af9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51fa3c03204baf07bfe6171610d94388125a627142aa9e25bfc75b88d9c491ab +size 26783 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f51a02e70996c655114b488ffbabb285525d423 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8da0e39ac9ad3952d0346efcbf596310bf56cf79c3976299ec5485eed95df698 +size 61249 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43e486ca29e06eb23b2afb721d6a6c3d1522f1f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57c327682821eacdbd724cb93c128b7be64a70c0c7f158f2219b939871e1f5a +size 40390 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3eb13ee964981c0b1dd7083cbfdee9723e0126a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd999640ae2571d7d22aa2944bb758b2624f01bbfb32db44261adb1bd7cb3f21 +size 23828 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31817db75c39e94924c4a9db730eed740e15cacb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7f0a6164afe999afe0bc53cf81c8bf385fdf79cc0a611e34f6b932c421073d +size 31123 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b46b30f1a72e8a8b9e19fd83cd76e0698419e8b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec856ff98d58b3ef7724aa0b14121198f1c5ffeb5163f1c1c762321b64608622 +size 22943 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1f06f806f434179bb07c9961847fb548e79f905 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bf107a40d3ee85bf8be11b5b0048084f7d3e57fba7f0ad3aa002675eedd566a +size 60961 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1d74560204e165c5eec70ed6d66e0bf70a2b6a1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43b1248a58bd84fa77f3e6928274ca0d2fd1457bbec803c1393b741ad3098b5c +size 25740 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68a12438cfc0db1dd71818d214b5cd7699347df2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:099fad5b6774c1440dcd3196b04819bfbd7f44f95d3f19305ca655afe104de74 +size 25785 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff93f84d37d84020988b9545094cefcb267b5c53 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ab7bb25a23dbc43a35e58c0f8f9b5c69c5dfc7f85e4d4a2d7577e6ba6582acd +size 46567 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c70b6e71f08acb23e3fba746993e476962553384 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bee09ef39d88645a1b27f045dc0789dc7f4c06e6c70e9e4f9eb21385d70e9394 +size 31516 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..331cb1b945e787f458f4afcfe5cb4c91adc9627a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa3d218d08be11ba5447917c02909aed4a58cf32dfc16ea8448221d68f4006f +size 28845 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b417bcbe6960f61f2a7389d5165f27badd4fe82e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0327d8546e59391834ca38b16670e590e94c9d390547eba5081d6859384e160 +size 74796 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c4c972188a177d9070f879685d797009c3d1a4f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78fb12a4db48d8a83e28ec4e20b6e92b26a8ad92e5ac49746160d96ce26516b4 +size 30146 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f3709b71fc7e6ef7b49bbade8e812d6521950aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04cdf085980823ce238739b1d27208cf07595bede65c3d50eb7b3d6ebc767b22 +size 19102 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1164f761e0f08524d9353ec1c949d662d247069c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9934f47ee2eb7d33bcee2071b6884e15d64fcb6ab45d2906b4c1c8f6795942f2 +size 87922 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af8bfc064de9f290528f9a5360d4fad5cc6a5055 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c9d906d1d9ceb6b34ebd7a106e1f0f5b72ebe9a416564771325c467447eab45 +size 50103 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..502c95ad099970dff4af5eee88966b78c9483863 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a05272a89b9aa51cfedf6804fa7bf0e036c7dd42f0eedba8804f63092c9a2ecb +size 31651 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9fc3b41c739a002251d2739d747eb2becba2d07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3512622363086633cdbf58d8980f457ca97e4b2354dbe5592b6d21719e483af0 +size 145394 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93da3917fe057aa2839a2f189f092910aa50a191 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:105ddfae61a04376d285ea8880ed5ea0d25134475342e5650c0f24c47b9c284c +size 44552 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0fc34bf14f1e527dd786d68e1f1d0e68940ff834 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01776674f8363d972180f4b5c4c5cfe85dc0c353deafcdf72a4376e7c544fa0f +size 54351 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ddbc562b3f543b69412330d53096698ab322408d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf1606446972a660f8584c176aeb9a7162eae90ba5e3eea09b68f86e80dd63d +size 92819 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00bf8ce2bace8384040429ca49283ff7d96d50d6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b6c678b5029c77c2d4ebe293e878c83c8220851548c72b03e026a1d2662c39 +size 57147 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7cc460cf91ecb74af47dd78c64d68817941c50ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea3b7c92c0a986aef3ddb5a56f2ba3441847a3dc1c5c33654af54325ddeb4cf +size 58456 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..465eea2fd111163180af1a74f6d61861949a21fc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ef7e5e4af623a10542264f96558a26515e96e7abcb0c810e3372c5447a20a95 +size 42909 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..52093f6bf2ada5d5e2f41e55e5f5199501cd5800 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c30f2ab5ab09d0fde28fb06aedae20785c5befaff7bf2af44b05a95f639ef19 +size 143761 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10e12836c0e250ba96be6870d6b68da3ed7df078 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c4dd82bba35289cc888dbcb36e59d0dc336552a25311afe1e2fcf677c449bf +size 71272 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c8daf83bab4688852b6cbba2f8b69b9396b10b3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c70995907e089a7f56933f0ec2ded370f1821e802babd9fba0bbca7fdb74b6 +size 161972 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d31c8adc980d435c1e3ae0146cac00f34028591 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78672241e6989e553a0620c7e20a3fbf43a64c6dccbee8e96869c9ee0d2511da +size 209481 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f35f18ebf52c953c7fa7885299a608e6e282c22a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03b2fbdc70f7bb45b34c1bf53afab8ddc5bfd281f375f9226ddade5db7ef29ca +size 49990 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a7802e6e1ca12ea23dcb2f084e3e24999979be4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd67a14b42b9d09454d5fce1f157b43a35511dcf7109a0522495a0096113816 +size 31528 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96d8433a9c0b034f78d153d712a586773bbdf15b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39cf13c0a8a8b49b33336cbaef1db2dec12bfa6cde83e5e5f1be6e2e96ec02ca +size 36051 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..922b95b2474a9767bf862327be2be75fe99f9748 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1c49e507410c7ee9ca3e4de1dcec634045132e9e261b431cc685b7273afbcd0 +size 29480 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d413abc83729ba6d3dd44e8365a84586bd560c6f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e429212c47d296db443650f91fca83c7cccabc2eebbc365b9f288bcee796b7b9 +size 40332 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31a0262ac1b186a578355e30640e68b0aa641399 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b14019d06387a7fb8ae306a6c4bf45abf0e63ab6917971ceffbada577e7383 +size 27073 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e7c24cc5f6ed7f5952230a372865f3cb4bfcb7b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:294c52991362501205460d4b998dbe6a2d940c4509296ff14e9d99b639a20c6a +size 21688 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a5e0e276203490c337287814f1055a6ccc2015b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66bd363bc1f89a66687c7d075d725553adbd3bcaf2f7536ba505d6b55ec4a81c +size 57341 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..497d266bfb7e8839de592e99fff5f1feae850210 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:445d546f1c1ba9ecbff771e6d89fc636ea4ea1d12812b99879b8108377504503 +size 22254 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae602b7cde736053d47d4fca23729cd31e5b2e45 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00e2f9379522c1f1bf46c40b2ff016c332e689f8ce5f8c390805cc100d30fc8d +size 169135 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..900eca8a4fef84ede6eddc9ce94acb74b2e0cb0d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eab0f495ffdaafb25117d157fd39ed3692e44d8db3f8db9ec562d6623ec4b0d +size 90318 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..583f2e0efaebbe1e12ef43993f2424b5ad70104f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:891498121a751606ce325c647c29ceac4be49e9bd11390095a7b0117c860ce99 +size 154099 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b27488a4e0790e7268bff25dfee97178ae99801 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b376746a98b44c6f25347ff0cd0083c767823e08e5e03b614d7463ed040de957 +size 79457 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..202cf65c4dcd1cfa36664e880a678473801d6230 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec74c17d69485aba17b6d28001c44892d624cee97f29295d8e6bcffa5916736c +size 74399 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76ab8b6949ae83f0275ff423957eda2df616151e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39de8e7e4bed7aaa0988ed7f2afc448deb688f33c3eb7d67d67b2fa5eccc633b +size 82547 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40df9824dd2a4cb8b4c35d9bb0ee973c62600a0d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cda722876f1a805539f021e233e97e0546ca4694e9d5a0a20efb5a92172f88b +size 89968 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54de1ecda265e2295ad966dfb64a9ee3a97e91d2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f34962b6df22056d33b48f019a47407914dfc5f9d9dd03bb31dd0521294055f +size 1005887 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0646b1a6e94efe46a9803ad5dc1f4ddccc778fa0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cf8256c2853ad3c60a689643bc757644cbacc8ded3a1313ccdd714ef86ae2e5 +size 133152 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..baeccfe5f9d4b269f97545ac73f214da03068a43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62716b0ecf6128110f53158560d5b8b0dd9a44a6ea0ede3dbc43c55ffa4f40c +size 183140 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a15ccc086e5f78509c525da1451c146a396e06d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97df7e7fc6e7708e1e89d6a8a99e9f5d083873da4d74e7a2acb1bdbac5b4753b +size 27547 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51e822b1ed2e97a8bc3e98c790765516de7de5da --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84401a16a13f3a2bcad09cc3d7ed385dc74a2e13b715141e94211a9eda2ae26b +size 120451 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..06143542f7d1f44c60d65f2a71c7e87cf148e068 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608f3f4a044edf1d5af33b9c8425d3078da3048ed4e7bed83a135d6a84f54e99 +size 57993 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e0696e13af85e1c938dc1955c97229102bd7d4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c02498396c690f234e460b4faccc02ccbe57a127cb87503d0f3bec0497a1bc7 +size 25400 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd66648b1fe0f2ad718163f32985cf8daabdeef6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74e54fbab25626aedb96398c929bb00379175abfb75a5aeaca75d245db6312c3 +size 39455 diff --git a/eval-results/mmlu/0/ckpt_339/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_339/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11f226fb01351c2b15552e19f277ae5e792383cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7190be5469b0ff0cb8db2fd7600f81081bafc207c39c702e37ae8c63cf7a0e7c +size 32930 diff --git a/eval-results/mmlu/0/ckpt_339/results.json.tar.gz b/eval-results/mmlu/0/ckpt_339/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b8314c1f8ed1b6a62c5f9ea3403dbb8112c2d28 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_339/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd7631c409dc0031fa6fc4f1e11dfcbcc38069fb5206826ca37ef4836abd6287 +size 7634 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0db0c668c6d4351e6c82456e9ab0875be1efc69c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:810b3156543a02b86a3d94262063e2e6373f8544e4b730bd167ddc8209b30c3d +size 17091 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a4da1430e06b771c10b64070e5b63a792cc1a1b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a4b1fd54033aab4509dfeb542cbb988488f7f286d875adb792cbf5e45498272 +size 29833 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1153316818ba64f8fb93a17d31594cf9a5a092b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd087ecfb6267e97dc43d008ed656e446a208648b5221450b48e807ced2ddbb6 +size 39843 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f2a684571cf8766a811acda1fa5040ad9529258 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ecfcb99c996e3f7ba6edde8f0e18a996ba84448ce4e39e162c550723739d814 +size 26758 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d09fb92eb683de156996cc04c91b025f9974acb8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92cbc0b8dec922feccb3a1a9e44280c68b491e7c8a7a3fd6745c24438c3809a8 +size 61241 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0eddb93ea10f194679b9edc274e9fb122a57a21a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e86ff5d91302575a2e4d17e986c05d645051bd71b33b1ec51a6fbb6daf54ef +size 40391 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9083eb3a3258b7d3ed9801256df5c1f175cc397 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cd55d9a886e8f914885bbeff8201026f0ad56106549d2cd90fee7cd6434b20 +size 23783 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d98d5d9ed97a25a962aa07628ed4d7ddb1e0d48f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ee3d20077a49b5450b86011efce1941560a2c9844ed6a5877862808406a3ca8 +size 31137 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b394e5ac23c39e2d1f67fe24a2899c75048e05ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4959f2f03f97cdecceff548a60f81b7338afb08e524bb1e50ac41c57d823b6f6 +size 22933 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..820440aef98e1d25d315769a94fc79adffdb2dfc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:357c90e2797278f6fdbceeafe46c1d143c9ff055c8a94b97108d61b5f23b1832 +size 60902 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..184ed06a1e2e7c24ba20f59e795512aeacc762ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188a2e7647b10a538b80cb0091af88780d6e2a9e5fcd7334eb213204a1d33bd6 +size 25708 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb0537c92585617122cacd47d0c45b44720ded88 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f32f2870c041fbc9c8c43e0052e649fd21d8245ecd4b86abbcf59fabaf6ea1fb +size 25800 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4274cc7121f55682db0c8eba3b1187d47d882842 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3745f20c8f92d3347de66c01081a564d01c2dd137b27b1c237560e2803079909 +size 46550 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bef09e0edaea39c813732429504c528488b69772 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd43b4be89e39b42c9263a5076ae152d40dee3f7d837a0279bbb9eb28882d5f8 +size 31512 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..12ac049f785a83529d1b1012538227862ee77273 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f14485ba83199b6dafbda3296c483e46b86a7368dcca33e8001c80239f0f84f +size 28817 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffd64fb3e13813c8cabd248cf04e4458d1a55de0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7034deac59e6f9fb3452426fb1f546ea3465c353409384f3f1b74a68ef20714c +size 74713 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..762d18b6c16014308f44f75a16467cc5ea4f28dd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1dfe1343e92b54e220598bc095459592936c6ff0c16e4c1993b10b01d490a64 +size 30140 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..17bf2502961a4c01f2f59b3dbe4958835e213dd9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1c68b13140a2fdd54080676466817a4eecde33f8fdd8de2f8e3e07e49a02e80 +size 19131 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8689b6f88457c59bd4a2255d180496f8669ec1a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ab23e6e0e900e9d818549c956d8dbf9d109648342ee7c9930bac6a1eca95326 +size 87915 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1fd44cc92e1f02c2646d07b3cdb07f684416cb70 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f755a05964acb48ff113f1a43cdc281d1eb87bb7d15310a9f67173c3762573 +size 50034 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56b276a4ae1d3f1887903f439437a79153fb721d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c92639145cf0a3bbc4767fd57e7ff9be2d81db5dad34b35603c108b32687d539 +size 31641 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d573f8e329414348fa4c670e8c609758d5b3c77f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bcf6e9d626871a1433da4472db2f61d153cf0787fcb6b7c78e307f7dc74899a +size 145457 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c161fc73c96404d2c3aebd1fa09228cada3b07a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:311b956e0dd1ace9bf183c05c436d0474d40c53ada30828d9ba68ad973583a81 +size 44520 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..902b15acc908a4323b9f7b1b0f19a415407fec33 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90bf48c57fe25b050cc10616fcbf71240e52472a308c2706e7ce46f4b28389c +size 54346 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f5784a380b209bd4efac2d3cf6553db2dc4a0073 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150341866bf6bcfc00d678d2c0d78aa5dfa0c2578f52b9846c4361314161c89a +size 92761 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29664f5764741da088ecd1ce0d46ccffd55ec753 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2fa98d06edcc1ba424fa79d7feb7dbd472993b7d2bd00439b6aa459be14914b +size 57181 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5dd3cfdb9ff5885419abb2f789e72882e57bd68 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8407602913c6c3fb5d83835c8cadf6dcd4837746c88edbf7878a461232b57fbd +size 58473 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..926ae98becc4c1806b40f5405197592839954aab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1560441ba041b68d184993a0c100a738c51d760e8e83421736808aed161929 +size 42872 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c4f9b8b0a260e7fe9e3dfb4569ddc8689a0ed97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcdd8350fabcc4c823c4bbe85467441d5dc9ef6efd24ba04aaa9dbc6b69aca21 +size 143813 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40454f80bc5da52302f3ec82ed6e3abe1141703c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e4f8264e0a408fd734cf9d5c6fc7f0091386b198aaf2e1c4bcabc8de3de9bc8 +size 71242 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c7b944ba57bd77ed3cd3ee9942d994ff8829380 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa86c27396dea05a24498e4eed3f41ff07eef54b0d4302f661a73b7b5f47528 +size 161906 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c25b411fc962ee4c4ba1beddb29cfedf8daa4245 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8370b9098aad71ca634b93339bc8255c53eeb5149657fff97b24bf44d92aeb8 +size 209485 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..977a3e779bf83f94bedbca5400efe2613934ed49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2260a8a0e4fb5dc44fa2fb3f20d09636acfc9039abddaec06d8599523df6687d +size 49946 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..653acaf05b28f81ef1f9458bd43d99ad7d18af02 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:830076556732c1f0a325b9c7e0ba9a2a20df355b1f0e45a8968696c4befb529f +size 31570 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a341d22a4748786061202a74035182c3c9ba3ef2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8e82e3dcc5370d004791cc9a4f3d9f345959b3940dbc44974b35a25db283ecd +size 36100 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4da6c55a8951ed9f727734a8deb6da9e5e79ae9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dec8bf842bb7f6c6599f8abc473781ded96c6c11fc8bf51cd5a1be106410936 +size 29485 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41b3025b1dbeb663d49068fd4bbe07a610293cf5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f027ba912671dae4e2358a337c08510db5bbfacb78a061d73784d4cb03e1afd +size 40340 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0706891287add61083ee0a5355ee23c7b989f6d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f71911d28de62075a8cbdb0aef9ba37d7e15220c9f50852fdf02903e15bc00bd +size 27045 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..341ef15d1fb7e8f1bfcdd77d55446967eea5bfcc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91d4dfc6c9a22f4b36fc261d4e9e6298d60d1cb47083bc0ab2ac68e753cd0d61 +size 21692 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63262afe155c19f5a6ce5e4abd8a86dd55fb10f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0fe0768353104768ccf8c3a17a4e9ddd7c93a94bae5aeb52cc49f4170ba065a +size 57365 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d31c3b7f619992982356181ac4798906109d9e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b213784407378964f4db95530f51e3709d42b0bba93ff5d6623c08da580686a4 +size 22294 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59059b4a2e6d861766d300f531598c8aeb8e804e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9aeebf9d55af8c92105fb640d3f83f6817120a287aa7bf53ca70cb816b006983 +size 169102 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c3a2c40ab5362c3f1ce19b23e09b9f5881ecdaa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c2c04bf845e0d1f193b3a1628973266fe24cb2c081a48fa0e4056481ec44132 +size 90308 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a04ebc3fe4fb1b8c8ea1de62578df44ad97ffe9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:043418d77764e6df3743310cb05ad5676ff238ae3efe312e07192c03e1e10db7 +size 154116 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca6c7df6f5f4909328e028f9567363e247e961b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b72bba22afbac27757d94e09e1931d58ed28b8b5f56599446f62d40083fdc645 +size 79486 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e63a3955a5b794a5f9f36835de28768661877f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae76b5869bca51e853c2fa8b6d75b974e04d5ee109dada7caedd7fc6e618a1a3 +size 74378 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a82ed3bc7ba6d46c2d42a3ca94e0d4e558ae8461 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:321f5e61841ab88605e305fcb1e577ec2e17250e1e1f958d5c62c5c870f26474 +size 82525 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c86a49be58f26eed74b905cd46faa5f7c3d3e3f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f7d86164d53c185fea4583cb62eee86b606175a8f158440b2cc1f664403072 +size 90007 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ee696e7cb86f4b49f253791d736977de29dc052 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a5727c144435dc1a79d30dde2225eb47127bc58d52caefd68257f0f7d8b7d14 +size 1005784 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c20af387bd32adb47a72ced0543b3d960362181d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458003d963c4253386674e62d24b102a94450b9f5e2608400097534686af65bd +size 133124 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..435af50a37eb5291f88c623bf25eb118c3da0bb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3808d16fac6f9baacb82bf707f826189a430110b12245e0c5b1dd5c72e70ce83 +size 183105 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..02972d42522b5977d885bd4f4b271c711b81c540 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfbc6909aae0f6805fc9299bbf382a0aec332416a448085707ecda421d70969f +size 27535 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e8a2698bfb70c8f5c16b5ac469d359dbb5926dec --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f22e5e8de318f22933a0236cd183d1aed29b1b1c3bd467fc38fcb8031725477 +size 120489 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92cfad8882c997be3f57c33897dd73cfc8d61815 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0a2bacb76105bf2c43d80c88cfca346aab53a40159fd961f1997eccc2734912 +size 58012 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d36df024b53b1746e3cc913e0f0aed36b70b2d90 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7ec4cd51bfc3930b50d81a20aaf7a4c4d069be154d9156e4432991e540a1d28 +size 25401 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0915c5b7391ceaea661cc456e9be43863348797f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b9b1dc2d3c498fa779594b35b0e59f91d637cef7129ed86ec4ac43654bef7cf +size 39470 diff --git a/eval-results/mmlu/0/ckpt_342/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_342/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..296fbe0ddd0f02de9ee2390862ad446fe42b6970 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39c4003e53bb42dd10c1fdc9c27a4c91e896830d00039c36212494613c01a6a7 +size 32929 diff --git a/eval-results/mmlu/0/ckpt_342/results.json.tar.gz b/eval-results/mmlu/0/ckpt_342/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ebae9182728c44ee6cb3b3de8ad3332d82fe04e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_342/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19d2942df75419ca9cb27b4986fe0be8d98e6ce8bc9c32eb26455c2865d118e +size 7616 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b90ee3bb21a7cc308f99d3fa3534d4006c1db82 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7090ce0d14663fc017becd946bebc78ef544a538dd1e88c88e56dbcf897b62dd +size 17097 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e8e0b7ccd2a4a85a9e3e099a0e43af4bb62762f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:615e2bc5db0f77671a1894a0decfc03cd2f9fef1ce0fa2fe6964a1ae5876f259 +size 29816 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53a36561bcd2d319e7dfe38c7d8808558e855201 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ed1589de8afed719554c8078c886379501cf7da03e3f2c7d4aad3e2905f587 +size 39840 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e7d6c41ada65cd784eba1143312ff42fcb6b645 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2195fe9c526b779d933fec43f2f5cd3e57d8a1f890f1380ae0c0469b49c0fe9d +size 26754 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7e9a108c23d7648004f5d3ba185f86df0b5836f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8755efb7a10b394ba30e5120c09002c60435f9a96998fc1abc7e473ada416bb +size 61252 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..613a91178f359e8c56f8055365b018306800b2fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f1a25a4084e4f3da1fda6aaf0c65161d52bb269cdfab43c80cb055979bebe16 +size 40346 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..858bd9c631f9ec00136f189b62f5eb118d0593b1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a513bc51c8062aea8cc50f77de27475a6f6ec830729b7c876bf104f08a8348 +size 23809 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..919fe6295e782d435d17c5d7dde35336f3ac59b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f1b0ea625b38587e5ec5aac8acb2c69e16642cd96678bddeac8f102ab480b7 +size 31108 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e8ad243e92dda0fd510b7a6c0af79a76ae77be5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d5a0ac80932cf8f0227e7974c4ac87aaf97203c071b97f283c12a9fb297fbad +size 22940 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..530f07bcd895141fb03eb82cd91726b591eb34a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba48f782bea277fe96cfa312fcecbc27be771c31f64ba8e29ca83733e219232 +size 60953 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71509617529fea3d7cc8cd7218f14f7f59ba5a5d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7568afd024ab2212b65692fbc6b3ddc8d06f1e3f79376e43f28cea1d3007507c +size 25750 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe315f2e077568eb09fb9c6ef9d8acd1ade229a5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac3978e829bb7a8049ee6f46e0596df29a81f0c0aa817780edc26350e06002fd +size 25774 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24dc143425dc1c4e9401d8a678515b68404ba345 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:861e4558d4bf06f7d3f482558b6c3900780f8c5ff042de97a10e64bc5f27c8a3 +size 46558 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7877979ef67670833d911f167481712da1039b7a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58028427f15b568983dfe47855954f2d0fa2ccb2ab3f613385547b45b42508d6 +size 31469 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fc8caa0fa412703bd3ffb000f921c255101403f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b75663c5fb14c96d3dfecfa7e57103e590cc7393c0b3bd48eaf7da2a486678d +size 28791 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3176d4413f755de0abd9123257f1a207e685ce6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76c9f1d2dcce3b5c2f8e00acaa367c2aa665c560e47bd57c23752700784d0222 +size 74689 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d17d2519660a0c8041e9b2388861f56cedab9a80 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74aeff88d81cc4ff35c61f79ce68deae32e22c7fdf02817ce975b445f77c11ab +size 30101 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38d5e40e5f72a76b7891c4a807b3169068638545 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c7fed6748b9ecef5237b5b5be0352dafb742e226d6e67fa2a67135ec024a6e +size 19106 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0283298d57b395085d31a69199962d701d6de9db --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ab8f5d9090d8a12c0097904719a3e8fd71422e7db72c44f8bb1f54666eb6442 +size 87922 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7ca73faf0f8b44f6049d7c52e8ccbb39e397d53 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8919d22004533bba2969eb0d64fa76c7d29a5a0351f443129e0d76dd376af23 +size 50043 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6e4b52e0f55f30b6e4a195161404c1154e11a03 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cef3da4d56ab3c5597dea2a52b70d90b865ad9dfa812595072bb2962da29de74 +size 31613 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dce926184efc918b5fa5b4bc8935d21382ad7d48 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02c608d33a9911749aa6e7f19f57f22d0106276bdb9e41fcc68422403ab23002 +size 145409 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8122d6094f20ea96dba30824408b3a7cd303bbe9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0237cfea597e26176941d211479f0acdb38f103b2b9a1c65f5e6f04195abce7 +size 44545 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c83a591b2e585ea303afedef5a32027a5a11c14f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd746c4c4a6d6f29cfdd5b19e0036cc685d9f88eee9fc438dbbe7f83084fef7 +size 54301 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7dded12f0a8d7a9752e7ea6ddd46f5e4c49c1b8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e21566b4a9cc1f5e4f088cffa30250d524aaddbffbe8f22f8d1a1f3758b32f30 +size 92780 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60f6e0ccf74dd4c10f23e81e3799896a6c3e3581 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d623199edb3b9cc6a9d773a8e56dd88d0a0928cb23fb5bda42061a5504102e2a +size 57098 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ac1d49adf953486c09a9ed21a4a13026ced5ae9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fbb1c1a8f2198ecb027d1f6d493e9d2fa010d6db5f9da0d33dd6b25d75fef92 +size 58456 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f972d37dcb53123b242345e2a943a5b31a8d5cf5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1fc6b6a3a331b872b4e8e6bce91e069e2b5e5a6019b7da6cb2cac583dffa87 +size 42865 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d23f2e7dbbb1b7b744b0c24d260954381055ce1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d468010aa5a891daf74a3f5f08dc1f244d76bdebd9c6b8bb9896008eac1a2fdd +size 143849 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6328e3b059d2e5a8a6c8ce4cebef5916a091bd9a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b5cd96fe42638bfa8044b4113ada907dfc4a830b0078a682e7cae575fac2948 +size 71179 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eb04c4f9189c9bb485f84e6c4472a8a2ae2253c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ba7185ef2cb1662db7f409b52f6a3b919639b6d3a208c17f2841d46e034fd65 +size 161978 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39aa909faa5e9187431966a3b6208be97e181024 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70caafdd51dd3134168fbb7fec7481cdf34fe78b0963e236c6b23f2918f85270 +size 209462 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b561ce8d5870c5d65a3b11573b3a6c3744f8ee42 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d098615a3f775999b4b6ed7b649a3adef13037a4b6c183b706c0ad50662a44ae +size 49927 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8267ea5585c12006a0187864ad24097441646483 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ffe72c01a732e68ec97f883641d58f16f25722aa7bffbaed661cfa8b0e85d7 +size 31537 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66d5174145c77b58380ce7fbb37b46010888e14c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:100f3754ea23eb0eb0d7c5ed39b546f54e7a05963986e2cef19a9c2a82aac6de +size 36061 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e432dc881ea88aa879a9723744893774434a06aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a126784ec6fad89b077055a9026ba7d94c06a015efe85e9a3793ea737c1c6b23 +size 29497 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2acd670bbacc74ffc52ac18b2260aecb40150200 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d20d201d7b004805798ef68b6a9217b9f0d5a9a991f4c51d2cf02d6e7d44a76b +size 40350 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1981bed5c7eb341e7450b1d17afaba18edefb06b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78f901e0217f4732b9e14caafaca5296d7b9af815289c7ce69ce5d22ace6b73c +size 27026 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..51b5a0b8378ec188821e7b371e20bd03b2381861 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f5964e79edab9b7cf984a7f390c2251d3e2221ad35a6b63ec273c654d04cd05 +size 21681 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90861dad042f05dbb5e6c53c1d41e01bf02ad54b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:886271455889cc733dd9a40b0404df90a4e7461809e7420db79ab9e46c88d0eb +size 57397 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..385b304b4f92574d43ea059d8dc62bd50baad2ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af7c7bf9ecad2eb5c0e065e4c515dbd0bc9f5b1d477551d7466e4666ec301b7 +size 22288 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2653b1e66247c3e81d0fd7790c32a885cbf02daa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7adea69cf35c1e5c3fa2bdf03352c78eed6cf270d5926681e37d40fd5fc728a9 +size 169093 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58d5c3aa098da370c9361df69b69f03c26f0b4fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3219f461fb5098d3520297af2fd109b8958153710d30b0b428c021079c18367e +size 90284 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..13037dcc599fa588490b7c84a296e3d89e037340 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:415f85509604fcc580559873d6dc3913806167cfd4c56c29558d391d9a8e22ef +size 154082 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ba435ef061d465f3efd6e2951060b4ed6d2fd62 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bef367bd23390e6f98e4dbd7eee3a04eb323f76de3e7536862df2f74190da908 +size 79447 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4835f01ac476918686d9d27dc895d2c53253eb18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f93dac0303c46af67d96d7ff5642bbf78ce78ce56baaf652c75d6a4b1b99109 +size 74379 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09e6ed9c43b9adcdc2295e0860f2a12b907ac6b3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc65a238736720b10845fb6b90a522ae87bc6bb344bd8f3acb47e319c6070ce3 +size 82492 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d1614ba58bfc6a2434d2423154330af0549bb44 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:056423b356d8d16974463254ca071e92a27eb93f24f1a0c71d9d18d4162a79e7 +size 89921 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb65933b65a70aa3f337d853dae82762e2db77fe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3e84680a94ef84b0bb8597f16d6117f5dd654ee115a72975b157d599261a9e8 +size 1005226 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..621d9c617745ddd90b28bdce223db1c9f59d718f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91ed686f8d8daa075a086da3059b292ddddaf564bf1bfd6ee951936d713f65ec +size 133041 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d71d309660c41ecedee79e36312327cd91d7823 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23e3691e2bf7900bb1b6ffe7b8b9ef541b22d85f6a2383250f22b6c163e0acea +size 183118 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f8ec950717eb0fc3d0ed7f0105e64bf01f8c9af --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aebb0050091b23f0344fb2469836caea69a58d9d23e5475d81f8b0e5159c1229 +size 27530 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48c3f7c9443b5ee562aff7f88b7668c195a11ec6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e3a6492249c36333c1a564e88a9caf96c9eca3c4515a1c46c29a682152f8c7 +size 120445 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e01e755b9d2ad0e939f1f83e307f8efb5512e14 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed32ee4e339272e747f719d7051054c2a7677fc8ee72748b501e20fc075bddd3 +size 58003 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2756a0040993361309362caf810c90dcc94d64f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a83013f880fde5883df35a0928a68c7b0410eeb4079d321ce2ff4a3dde1890f +size 25404 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..460e28a9bee588d34936396487f7be61d98c9641 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049dbd95473f47ffbc6e04a11dc1858b3f8377cc095f5ff700bdee72282fedfb +size 39486 diff --git a/eval-results/mmlu/0/ckpt_345/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_345/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f67e73c3a838d0f1fa0fb488ef2bfe29c55018aa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38cd1c14f322047084470dc1aa750df851e2ab9cacb4201665fdab3b8be30b05 +size 32926 diff --git a/eval-results/mmlu/0/ckpt_345/results.json.tar.gz b/eval-results/mmlu/0/ckpt_345/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd7d517063f22219a777e80dedea9799472a864f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_345/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f68eb440f81b2f5fed3b3fa6c816357c2b1ec6c9dd9863dc870a572ffedb4862 +size 7630 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccfa410765cfaead77bdfc4d852f52c65e7bfafd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44173d9e3afb85ac44a08f14610652854b6b61b97592824b0a3266616463e3cb +size 17056 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..432712cec3972f002c63ca1193c7e991328b1e6d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:163d5f3faaae03edb18339e9e25e5cd95f440806ebe2b90b75c82a51810554e8 +size 29790 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8d1062b27eecd0c95e958514ca55a0e13eb53cf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61189303eb94d897f1e957c6e2d4fec1be2f75a15eea12755eb7b7796faae34a +size 39808 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5ab33873647042c25e16b33db4d2bb71ef8bbf8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e955918caba886a8e08531b0ca91f5fc2b4fd1a31bc6325f5b64891295a8f9de +size 26767 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cd879cce11b4a916da87f5365acf4b06b24aee0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c656426c00ac010df166398c3aad5b82caf03abdc339bd0cbde22c55a970f55 +size 61269 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61afdfb12bf8ded21dcdd76f4db38316080e3904 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87840217bc3ebdabee90c6f62434adf7336f566ddb6fa9c6ea46a4bac8208847 +size 40391 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b3b14bb86648f8f156a994f54d1cd22c4acbc85 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f0fc0724920b8cc99c547320307a1aa39207fa9d186a20b125304d48a642953 +size 23819 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2e684ed5f373e840f2dcdd7edd4cd8cd8409cb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e3ee6fd1df1a974350be1b81f40a40460878fea96ee84c847b29118c83beead +size 31134 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c268eef05cc7c1d88446f59a5e94fd9007c81d24 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6199745a054e06c327ddbdc7efa4a5df11bf7482a68b645c99303135b91bab5b +size 22916 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8fce2182bff5314820f5826782f80b02dff393d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9254a5b47aca7112963a5cd288c8817530ded86c3579ad54741b805d2daecce1 +size 60909 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..61cf9e43baff94236a547207637da1e54dbb95c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfacda8510757ec1f2a9813cde020df5b85abb0a28ce3c91e8690a5a8c574a14 +size 25722 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e93ff1ba8ad2302337a3ab045dcc8413bf0ad07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534913817456710b76ec9815075ddc33d727ef94cf666dfb4be1af6bfd2918e4 +size 25785 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f546f05f1a020a29c77b2fc16338601e745934b3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddf90b83057f44c18409cd9f5624269916e60c2ee49f58c9ef938b8780070abc +size 46539 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eafe2dfc96cda501ae22f79c45954eb3e0bbeff4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89ac17620ea682eb252473c5e4bf2d35a4c3e272e68e1468ad610e31b0870fe4 +size 31505 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20ae8979387433c64fc4669c7258fddafeea7013 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:991f2e220477069f857a8cbae09910f98640ab483d72e769f874d987c0fd6465 +size 28790 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63c6d4edeca3223fa03ef7772f01b9f70eb06304 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545c12f5f80f047df35c07e1976f87a419023fcd982de332a3b48cfa0d40e20a +size 74762 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..162d5be7e8061cea18d7acf18088bd81efb9038d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e8e9ba705459ca3a9a6d73d9cecbe20de34a0be1250d1280eee06d971c2b97 +size 30177 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c63f7337f5e0941565ea49006fba97bc297c022 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb962022107b7577efcd01b36509f9b0613da1e18ff504c16f5919f9cbd38754 +size 19155 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..29f1bf9cdb444193a6b0ba2e5634426ece02f198 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70d3dd555cfc64db08eae6bfdb25c7f01e127c52d222303fa77e1bd8e9c004ad +size 87942 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..baee3e47953d76f3b6e0036cc240bd7a8c1a4476 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6af93a153a9823d73736279f8e1532e45f1e9220cb0e60467b7645e4828300 +size 50031 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad09ebe012f03e2424abb414cb0c40cc9197d920 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdd9efb7a3d867ef0f36db845f49a9c0784c06c5fb8b010733396cf7e9fc5901 +size 31619 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7702134d1dcda154a6c1da488ab730ba801209f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c4ab97beee9559b7fe6efa4243b9e2d6c52a79a04dd42f12fea06f7e33cf143 +size 145408 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..706afaed733b632067ad63c9a9618e4fd1be426d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:344ca34645410ca752a245c3dedcd08eb7d0733b6dd1a21379b5a5efb31d24ef +size 44505 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbfbdea62e47ea8137ef65828b888a93215520ff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af1b106ce03a8582fddbe99f4def6d99d1a043cb186c0687ccc077fa80bd4dcf +size 54336 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f70738aaa858f7487e3c890f00b67ae1ea5365b6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b0d4b57eec5e1a3435f0a398cf6ca1dbbd8e45ee8d777855d0cbeb06879f1e +size 92778 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..46d98345610cb80f3f78284d2ea9e91d29e25ebc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a336e0ddb32eead37365a808dc81a9c5bbf9af31cce9a55f825f5879d59917c1 +size 57110 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a09ed31cf4121187596edc73f4dac55302b58ea4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3130f76a1eafea15275f6fef54e2922bdcc4106fa8b78a6e9158187371dffdb +size 58435 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8aa2613e00773b28dafb645310a85bd360409170 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5ade28135b29c766090d897864bb26215793411ab503ff08f227e3d5313a6e2 +size 42864 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9109741da2432b071d8c4d6b05711a661633f483 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b039dc8d4367c84f4b4f71deb912552e0fef9c6534f99d95b3efdbd42210238 +size 143756 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1286f9438840b5e39bfc36c04c3879ef6810b02b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fdbacca7662f2f69752732f4c7e2b92d09b68b7604f9f02b6b71ac0b6412032 +size 71244 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5faff0e6384d3601eca6a0330691f73c9d6a3d92 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db04b31aa25aad3c930ad26951941a3b771fc980182fdacaa27f8def0af456d +size 161933 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30eb4291e0a210b4c0132deb360efe9c5eaeaf0f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8ac6708690c2f464b907e29ea3af8776cf40fe6c74d13d1593d63ba601ac9a +size 209380 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..633df54c0a74a5f197410c8d0d8fb821a7786834 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bee043e1cad97fc9556b2f5970167448fd9fd24529a2b8744b83a18de56a10b5 +size 49909 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..606bf66f1841673590cf91df61d58fc455169c18 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6bea88a84aee99448d0de8e8ea3dcc3fe8f403f9b73dd1014bc773a58105619 +size 31525 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70fb1de6a23b26eda58544d140b96945e3060235 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a935606b787d38fc45f31af970aacb1ad815701f87e80e2a74f6b3d4592e964d +size 36082 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4433237c5b5c1a3a2e232ae488ef378d369c27d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b390d0f52ae9340f23dc7b6b4a7433f68227ec50e97638d333aee64800022084 +size 29500 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd1d29d892916298ba696aa7b2e681796692b873 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a6b78975403e5607dab569bb06a3226bfb919310ed8aaf49a0c87c62fcbe75c +size 40346 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9ec8c783c22e4eeff610006c89ea56a4084ee91 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0663e648227c5eda52174306f1be14805bab73d3bbdaaca3568cadac0662baf0 +size 27082 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c982025366aab531cfd1b949898daada80c7dcd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17bacc67f5e2381665ff47fae953acc9016cbe3b9122b195aa1dd891a1167557 +size 21689 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3bb81fa430a00d7761a8280116d1fc6ba51f56e7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27e1a8849e49522063cf5988ee9a985cb9e78b1d3b898cfc2651d33cfebd21c9 +size 57395 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b94924a849df71f8061834336503305f25222a4d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8836121c73fad6a27ed47b7de6fcb78cf0232dd9cc2fb22a938471201f4d1af4 +size 22272 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f2d87537f320dac36e542584a7383d5898a3e30 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1884f42fe35fc8cb8a38ea0d87474a64d2439e2ca4cdfa328380b74a6bc0d083 +size 169103 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d4cf918c000b5e3aa0c46570fb9bdbb7bd0598d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e77c37020daba581a3125224f9de7dfe090aba45f8021d49340e79f5fc4bce6 +size 90220 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8386c3c7309bc30647a8d5ec13dd487cee3e0bc4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3096b3ee3f2931d0e598e4ea01f4bbe23519be4d286bb7daa6053b166add012 +size 154048 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04cf0e8a4b01155f49c7ca9206e573cfef24d1cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c63df6b96e16afa1838195263e452e05131b4c58a25b56311076ba6cc54cf1a2 +size 79484 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..028a7e461da3fc96b1fc5f4d496723596b7dcf7d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200480a67e669dfb02f8cff5d9f4d71ca2dad443dbc8ed87042e0298aeb0d317 +size 74381 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7b14365a37a9a47bb27b5e432bea945f6baee85 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9848bec166a09e6f572bb5ec5e8bfbac08eca6d709b81d49d4976b9836b44f6d +size 82543 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d410e5ff448a333161ab907506cbc89e785a1d06 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e0b0ab37e15e2573872d699e6e9e8ab5f503746c309e20889aabd1ac2848d98 +size 89938 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86ca41db34fcb3783a03c50f8699ac791f26b2f1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:555b5a82b6b57078e3fbc060925b80f05ef6ace6e481b0e5ae0b2b2e8ee900ed +size 1005274 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a71fb08d95dc6d17f151f32a3e0ee49a3b656a8e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e58536d21fa4d135bb869f37c05a489693a0c7de7453c27b7dd1d4c6c5fcd3 +size 133060 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b74c455c4f4a04c454d076c8f98be32358e94ceb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c41324d4d5dd057eb2a00c8d06ee9f8c558ffc2eefa10fb43ede5109fe5b3ec7 +size 183055 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a7a43e154d02a424632920205e682d7cacf618e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e091f7897d0e8cdbc50a54edcf77f3839aa42eefbdd0cf98088b34bd823c4e94 +size 27513 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b940a14ef2f72dafd0c91de0d776f09e693010a2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfce79e7163dc9d213ff8bccc8b62fadeb5a26f08a39078ca6ff5d181b80e449 +size 120427 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..283e0c6a54932a0e7e7e21245dd95f9395db7735 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a1512936a21598ed468209c4fdee329bb94c0ccdea431242bd9cfda07fa076 +size 57998 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16c200e70446e6c19673c3dcb2bcd9e8f6a9f6b4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d330417704c12bb74688896c901c5e87395ce901bc07a51db3cc0507eacee754 +size 25390 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79a6c68cda67e1721006cd0c0ed62b63aa9b026a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:489e7ce616f003873d7df22b9e62aa8c9bd2564aa6922731d95d13cb14c8946f +size 39441 diff --git a/eval-results/mmlu/0/ckpt_348/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_348/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2fc925fdc87545ec395111dc9420dda0a4545cc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:819e71d7e9e3f6b7918c27da00fbeb7fc8f88e82cc6cbf8e375effa91defec7d +size 32903 diff --git a/eval-results/mmlu/0/ckpt_348/results.json.tar.gz b/eval-results/mmlu/0/ckpt_348/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cad6a31ee1979520728cca122662e12dc7071a16 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_348/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb9a8514824821aa8c73e887dbbd0ae7b7608e060db5d34a3800f4f996c823e +size 7612 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8d75c3147e29a79ceb2187e1ceb6bbc4ed635f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab8bb15cd6cb0f74ef4ac425c91970c1f6c3e98748ad4496ebcc2c57b1b9e89 +size 17091 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdd98e152ba3d6d74e92d4461b54e58c7f904f01 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e296c5d59f9e60aab4d661e1e2e85d886f17828b19f00f4385db00a55cb1e9d0 +size 29836 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8468152f8ddeebabdf3d352fe4b36d969c514f55 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c3aa5df0b8a841c6a3447a6549f9fbbbaca48f8326cc469d4873b0b7951168 +size 39872 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2292aa6ac550993204c9e9a1ba61df3828a64998 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00eeefcc23b48aaab8318942da7fdb5909a063c08666377e550f38645e916fac +size 26777 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c51c683346bcc6a13908358288f394d2b9b17be4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5366bfbcd00edb1dd3753e7eeac819114e5513fe228304cd6a88a1a823044c6b +size 61228 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9a5dac23fd5a9c43be059affa1a196d54db5376 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ebfc4cd5c97294443075cc170679de62b402e28c178a7353150be021388e9ba +size 40375 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee35801d7db1574b74f62c23347e38781d2ebed5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7f74a5c50a11323719d7727677d22a8c1452bfd3a555ccbe8213308b4fbf98 +size 23803 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23e3bcc80e3f24e12cbc249fbbec386e1ef59362 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d44f7d7d51776b751a9fe91797417b52a98985e2eff4d02cffe88dc7f1b2ff80 +size 31120 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86f89bd8629447028e094397d04c6201d511c739 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc4ce53400df06e1da6e9b983cd57f388b6e4d27cbf16d248a6cd6538888560 +size 22927 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdbdbcb5617f611b5f46e6f82faa5d23d82a3a09 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79ace05d623bd6999a338ee898cbf8f9718c1d0d9bad9c8d30a286dff179e08d +size 60895 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e26008d4d9b3e6befcf611e340ed2c9e43f781c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3bfc9304ecf7d1b5f1e52ffefdd004fbd98bea263bb9290517b798600b74f3f +size 25736 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..07f92495fb06aac330e7d2466cfb58ffb7976acf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5878766e72aeac2e9e44dd2d2b2838d1072e3239eae9f45cb2de1809b62db37 +size 25780 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e94057b18843184b4a77c9ae95007591e554bea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56b7300e956992f90ed699f7ad5eb493918c99a09842388399204e837ec81059 +size 46493 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c513aa0cca74576d63e864769075ba86db05461 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:009eeb6dbba18029df31d185be6c6c3d1c00652c34391fab1d801189c36e8e3d +size 31516 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d494928ee3b4958a4e5fb3db0f516e7769ade5e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c85d068ad1e1df6bd4942cf4290e347d2b83a009dc4f6f144149c38faf2012c +size 28776 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6334131a91fc4bde1f0a42f240fe51bfcdd84a61 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adc8d52a9435a5e3abcc386d5dc865545ef50b2daf663b5f1af3eb0a297dc199 +size 74728 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b8bf5699aaf3ff244e6f506a188dc498b612110 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd58367a8fc5f33654e2443a0fdc3a5e38dbd803682c268e0eadce48fd9528c3 +size 30139 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc8a90568f7e8ebf51902e663a88e771b3718446 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e7b1623f4031f6c6c2aa31b162d9b7546d336655ab4c0658db254294d8a9de +size 19126 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76dea4524dc9cee4627eea0965f04e82aa59b039 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c0ae69565561ea12d931220ca856ca152859ffdb99ae89409ae18a2e4bdbd1c +size 87928 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4cceac05f1b7afcae7aa42319703fb7bebb415bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfafa5cb1a586774ed7a8f9abc0d217da8507ba460bf6e3d7a9341d578ef225b +size 50002 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d80b07782b31ec08c8c0a5d5116fc4485fe9b93b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df80a03e362d1e7c31c3721b1a730afc159371f5952e1d23b7cae4ec01b4dba9 +size 31611 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4a774d9804ea95f3db8a838ca2654ee2e403ad9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce4edf1bed7624b9bc84db8f7414ae6e1e7758e477af7ab6d39d4efc8c161427 +size 145357 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6858eee41377a05bb413721a9ba07625dfe35a6f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b2974bb74977ce98f60ce43afcde6c48579af4b132fb182d3b0cf4d937e473 +size 44570 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67e47829d15c478d34eafea5161716258dba154a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:327cb2a39ce6e7be53cdeb8898efb7ee2048acb4df92b55455aa2fc97c24dfbd +size 54339 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f20301f1c78af670498a9e2dfaa1a0a2de5d2303 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65d237a281f6a60824015fed8b58755f8c1ca12f68245355591b4fdb8aa57d65 +size 92857 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abc542558edcc26e08bf8333665874e0337fe451 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3686f475472f6719b7592cbd990d06ed93a4bd9f1e8218098cef0934c7d9c28e +size 57103 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f76f51bef6ca028a67d5ee53d0a45a4618104e9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e9d4381e3b78d2716ff4c765e73905195bcfbdd32b34aa75149c079f094632 +size 58531 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c06209ecb4a5abfe1966e463561ccee11c4c5ce3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97b12e216dd5c2ece4840f07ae63c797837d5ae44651969b5f610315a6c17253 +size 42883 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00c1e1193ca2b2f39f2df3e20a8e2eb54dfbfdb1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f6935b16aff9d9113276a363b70881e6d52143e4b596cd4e8d29d6a9c88162 +size 143869 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f414d9048cce2c5c643d5a886d2a20539c53f081 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:276239dc3e3bee959970f59732efa69adbf4a2b4a840540d29bdb7b7d23e0756 +size 71199 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d526765da312791e84f302b490bafe2ac7ec3f52 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894764e622e7c4ed7be42e57f378a375e366705b0df6212db24a174f9942638f +size 161883 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac3706fd3da96914781e9504fcfc2bf58861ceda --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f9e7e01b568764657281283e289dd31219779236e2739527627ec4a341ef34 +size 209421 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41a2cd919cc47e463d79fe76401004e13c6a588e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8297696ab66d209fdbc3d7c624abc46e51128b5a6d170bcc729c4769b9d32d5 +size 49907 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfde726ba5aa7091716a4d671bb2365d8beb2956 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f25bd0a84791a121ad10c6dcaaed4723be8e5881e40111185fd1389ebfa9f4d5 +size 31508 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..883821f4762fccb65494f092ade6c2ac62c768ac --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0903b08985f74b4854458894f9fe63de678a0b3ba2876beff650cb484ae191a7 +size 36059 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cfa521b089f6a1557ec785a2be73aa0a3cfaf68 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ae940fd8420600c30a03cdbdec9b9e118cb1707987d1d41fababbc2a069b41 +size 29488 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15c0f593738900bd1f5c58984297fc41dda51bd3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:391e82b2fd8e4f8408defcca5a266dd3b08420a847094bafbd895ba598610abc +size 40346 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ecb817fb4bc7144e390d0df67955cd1e6eaca55 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e774b4de4e93832fa0727ae348a815aca85ec363b24e3cfa515293d02cded6e0 +size 27046 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c507cd67a82dc8f290ecef36fc18cae5f481752 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2283a6f7d9116cc8215665833dfc2b6a2185137d8bea25195b72e4eae113a456 +size 21708 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c4e4fb7aeae1fa2eeb1927b0671d77abc461e43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af49a9e9f0d9fc47d83a3797daad52ff67d7f6edef056f273e2c2faa3f98dcc3 +size 57396 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbe75958623a85a6461110501b9d20233252a690 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c39183dc23414e28260b4be6ca549b91878d2c127abce7404989dda38de31f33 +size 22271 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf7af5bb964236e9e058ce20e09743575f1817a3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca9927d976be08a596801c2bdbb7c537b8fb88c7127923ccab09681fd2b473ae +size 169066 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..801d26cf4ed00835430aff5e865437817e74eca4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:361a290dc59acc5d8d1d126c3c280b038c2c6d3860f93efb155b4f60edf3f384 +size 90272 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aea650c1937832f2836484c0b8d6eeaf3aab314e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd7bbc333b08aa763ceb3c7a5a460a3b26580a271dc05bafddd07d06a224548 +size 153984 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66fa81e2be1a1db5bee12dde3a0292b80cef5fe3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7603eb6047708e41a0d6439062277f67a115d4a1fdcc36144f338a0175f801 +size 79449 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e921d19d5ca1e040b1a7979b45bba31c7d52c244 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3b0abb0ef75e101b464ef8c0b37df64bde60ae7b7658052d7c422cb75d8df6 +size 74385 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..630b4134b36fe1337c5f53a0446ebd87e41102e1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86dd140d5d2feda894535f164fe1477c42284eec543b03cbb8622488e77f7a03 +size 82587 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bf6bea5faef9e0ea95acbf9079f01ba2469624e3 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462563aacbf616da1eaaccc8523133ee6f633275f206ac08d82eb2a787929e17 +size 89901 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a298fb7ab19ae665aa92a5ddf648d7b507449a0b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b00a7eefaea1f39b3dd46377eab378450236a7ee9f2d0383f5b2dc056ac40fd +size 1005350 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e356974deb535c8a10f8f0b4631b68047096239b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5afbc5cf8d0ffd1aa7d6e41a65564af2271062068164498457a5e452d8acc8d +size 133068 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..711115f222ee77d449d6e65e9b59969b7e521dda --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41269f031df834c77e71bda5e6d31229f7cbb07e60674ba87ebf30d279bb9654 +size 183137 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6535b843a13a60303b25be529841c550bcd0ff3b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:306d38127e40cef21180ff17e652524ad2e0503d16e823abc873411b5be47929 +size 27523 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca3ea91b25eec3185a28ed71239bc1944ba51806 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b8950dc2954bcbe557e0326d9ee721fa36732cae3573c4a73c1f887d61d7ff7 +size 120424 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3f9665b6924a16a8875db21aea53ad31c02dc01 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33333a5d0a3eebe07c49919c379ed0f3df202cd133314c42c013271ef14d0e04 +size 58021 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3e0413b6fc312b67df2ae1c9e040b4a1574fa16 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29c50cc24ea1834e2b5b119f96a35843e032659921baceaa88e0b9b0c13c863e +size 25409 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9b69382561abd08ba29734a7aff5be156ac7112 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba5b6c0769309b621e080888311632c61344ac91e5da6dac1f9648a267a021c +size 39463 diff --git a/eval-results/mmlu/0/ckpt_351/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_351/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36f99bd7cb4b92b25f2c6d196550704a23bd1a9d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bc8b595a540ec09291640ef66015ad770032e5c30940fa25a2555aa5723bc52 +size 32907 diff --git a/eval-results/mmlu/0/ckpt_351/results.json.tar.gz b/eval-results/mmlu/0/ckpt_351/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0744ca7919186f107769e48da8b472dcd084a744 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_351/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b1c0a857d1553a28edd58c68c51924156e032d0ed90493675e911b823a540b2 +size 7619 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f913e53336fec2a7ced1e686d6d61610ea110afb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d16685d63697a2f8a448c62749c0c8f6bdac27eab0f4377966055bea668ce9c3 +size 17112 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ba2fcf6ce8786b3107facf058035bf749dc7478 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97038488ed5172b57aa15086f43e222d03fac81c9e9b8d7ed1d60b9d7b176a9e +size 29819 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d704bed0d7ad2474bab59c56386722311c8b368 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53bbe75443690309fc5f0a0d2ecc4b6aefe22d3cd9ad8ef5c8677d7a8cc7da42 +size 39832 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0af2dfdca679263d6280df94b91edfa4d14b8c43 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4f6d1ed19d83f26309a195bf819630a26e3f07bc0e2e2278129b0f675e415fe +size 26769 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ade844cb04b1353d907b513b8bb38ee032b11f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06697dd49aea99e4a3476aee66d64b48a1a988d60dbbc2f427f2a72503c6b39a +size 61245 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..128b5eb8a0d8686bcdf1ce597e417ac52f80a94d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39fbdf129f368a327184078afa13e8ca65e64cea7369ae2fcab2d28f0f664e5e +size 40375 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09433d191cf21dc7b7b95e913d0adc13b34664f2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d1c73dce7ebd8d7cfee0ea6cb76600e072faa866d450d5d9115a48967307577 +size 23825 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7459e51690b7b2f65f22e373cc80b7960a4852ab --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7198ea362cfd502a1d37800c36e670c6caab4ca14300871b2c3807bb59e1262 +size 31132 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a58a6e16c5acaea9643cd5a36f7c5ead65ac3b4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95abb9661ed6d78f77f59d04baf143234dccb5dec6cc7d81457cfa2d53a37116 +size 22949 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df413adbd5494ed16e7d9a9b55257d1f4f6aa9f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d94ea150bf923eafe93e9970b594969afa0b41189ca82627c4643f25fb6083 +size 60934 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7965e9340adcaf5413f8a0f7ca8fd9607cadb20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b02e3b597b7d0afbb82c47e6504eabe6ca0fd0c6629fefcd2e4351c435cc15f6 +size 25753 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d092b6fe4101210499ce2775ebccb3352d910b2a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0040891c3ed960e2dcb603c13e257df36dca906a79c02b8da909ee86b00e930d +size 25719 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..573e01c238ed61ee6cf0686e2d72d4025a04a681 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e97993e0b7e953ba134525155bc97b81de1bdb2e0e080263e7f29fd1b41d10e +size 46498 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4ff1904b118bdc269e21971dd3cdd37191988a4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d891928a6f9f354b6bfa880e438d5e15526dd2f6b2be3648e6b3cb2ccab5c7c6 +size 31518 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed57e60a6234f94cb3e2246aba619039960b59ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b4c29e143f123f3b10e24fe38a87a24b7e8ccc32366e1c4ab7343ba4d0c42cc +size 28831 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..089d080317c0b83303b567a100696fbc48bd7a82 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741192034d7e2580ca01fee284234692a8806d84d307fe1e44cf5c286b0bf33f +size 74808 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cec276de259d55135b13e1fdd7e16b263c531ce --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caecea97345ee06ac1bdcab77524a262f7ca6ec45893200c10ac3f4f582aad74 +size 30159 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..675d623d7b21b1d08c01f07009062420a193a5ea --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28de50c901e6361c69cf0abafffa545d8fd5f6870035511ffd379b12b56c66e6 +size 19162 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c5aca6e45bb54b0e189afb5978b2d30e91744d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99cfaf4b30f6470ee49b1d6ca941f8078305bba0d1cb08f0ff12187e88b72297 +size 87919 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb8bce6e08c1cfd44d9490de837d52c3c148a691 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa524adea0eed2130704805a71725e37ee295ec82f2ef184036cfaf76ace3507 +size 50056 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ef3972cefaffe8b7302fb8b1f6d7a73cbd440cd1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9033fc59f3d669fee3360b2f3421a8de3f1502116c4120bb40cbcb25ab09da6 +size 31615 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16c1d2eeb9128869f3d2cdcceffc98ebdace84a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:034fdca6944f42ffb8b6320afbfc021b3dadc94487bcc5effed50b2cb7dbb242 +size 145384 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9516a0c72d539c17c8be2e7e50a37337c6057cca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8267af7600d9ec492b2db7776e4e2a02059440946e0df8c94f0558ea7be1787 +size 44562 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72faca06adb3f7fe5e4ea41bf03f3a7f9de291d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:087204ceadfc14acddfc18a3e8d4dfa2382d4eb12d2cf003a0b842beb5cc8b13 +size 54321 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f10006f130f449e4d22ac6c478fcff9ae91d792d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26917fb2dce1b66823a6128dbc013f21a26832fdf15f46540896dd3b31774a45 +size 92801 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18c1506e110f00431649e2895b20c9b1fbfef16e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b894eb22b7471a946f6e677b3a178ea20ff4422d9d1668873d909201bdcafce9 +size 57145 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b81b23d22e55fd28c4b9fde9c3d00c59a509dd2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1933e0964f7d2359a163b0ab1516db0b716af75f9fd72b0673932bfb86582a8e +size 58447 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9b4290f9b9f47b53e1a548fae585429130edc5b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab35b76303e1ee746deeb52e3d33b3f958704e67c9d452462b0cfa6da003af3 +size 42859 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cba91bedbc09cbcfd3a20b6a9cc8ac4e84ac85f4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7184bb74988b8de8d229fa9ad9e5b0c163e4ea2fcecca6db3a36446122202a28 +size 143799 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99bda1a09d7af43fcb22e0e29045992c5f11653b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7aa56fab7103e843d588378fb8ee0f606356bfe93ec439ea1b84b6a14b981f +size 71237 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..111ad86857953b3ea65ffa1186bb7fe6cc9415b5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6923e2f290692bf5e9230123c62bfce90d8c4a6e394d91e50765135a6e59da +size 161919 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..de28702dd56fd0efc74aeb2d1308ac8636deb99c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302398167ce4343d801555c785cee5c2ab485ba675f0228d47c3eca83395cf89 +size 209393 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4cf4d3f3059f3dc8b8570e474946f24c380f4a2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5988a83078dda95e544615c7fcc4d6ca9f7066b99d5f8202d0c51c25a9b20efb +size 49894 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84a85b2e41e1fe96f973712c7cb5d6e0fdf9d2d0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0c090b4d53c0bf9d437093456a3c4db8e40ede285bad85e9ddc9bd333258ce3 +size 31545 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..329c2f76f28f8bf0268db3091f2816676a3d0a50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:569a9904f492fbc9bad799b956d0755a151b151941784d8e9631d7e1d1cde696 +size 36081 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..568de73dbb287cbb139e6904aca39a9a1924f1e6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c5afa753d013704987990dcc08b414ee17787a90b381b7e95996345567d393 +size 29474 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..823325f0489f328ee9493836493defbb79df204d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1236e68b6cfa7b20eeb3736a33b83c1b5415ed172a0f25d743c8c3a2dc64f44 +size 40334 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0994800ef696b5000d99feab872f104e8a131d8e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60368e86a8019c5fde4ff481c15d4c15e581ec4d2f084c401e32b347103d7fed +size 27090 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..af8498f44377f47c3fa46aed61b520a92ea9babe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2354a4ae243f46eddaae1139aae38af473efb35dbf8d5e4eb5e985a70685e8f3 +size 21679 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fd647d955db74dff76ea161eaba80d7cde5fcca --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee75422716c6d36ace42358e57811cf2e5d318fe8cb3ba8e6e1bf15a79156c07 +size 57370 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..645ed24730d22251563d28d2dce4a0fce1ed3315 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4d8ca9c0ee306e8a115d0830b8bd84ce4595b6bec30c6ed5b65ada431e3ab4b +size 22284 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..60602def879757b16278ef6e4d16e9dfddbb9bd0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7eed6b4b5ce77e6cf29ac7795d893b1ba8232b2ea19294b4ce3304491cda32f +size 169121 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3a226b1f7fe645db4b37874d5d0db3c1cdb2007 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b290dff07ba9dfce8909d0489ae3e2044a4d82ae99835ccf6c53630e4aebcce +size 90247 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90c88323cafe3abf6083f8d45dec29777c7a9b07 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0d4cbdff810db3b043decb9504c8c4d8874eac2289f656c553ce99ace13c7e2 +size 154192 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2000713f076ad343cab600e828870f7bb3e5fbff --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f73b03f597a26e2545e92bf12bb2e66531d2f9ec246d556114d90706429aeb5 +size 79434 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba0dfa0b7d946c9b9335ec65985465e1591a63a8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b0580c68940be8a3ec3d28047d69d8d48128fd9722a52df6665a53f1ba4643 +size 74363 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5155c7dacd84b35debf03c788985923fefdbe90b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a10882a214a8c0c074470b1919ae0aeb280447a767a9ff9f322789ab561c9570 +size 82530 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f357690f70b37cf20e97dadc36233c7f17422a2c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2e0eed46b84374b75186f5171b662dac5b745256a696203efeaa8573617f7c7 +size 89917 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..682cf2647f4e57daaf14807554919a3fa619f46d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7e9e25a26a7e00d3d46c3a59ea21ad908053e41dbcd837f4e29fd249a4d405c +size 1005357 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8e814923dc036ef13794aa2ad552748ae5b65c7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eda1593b725d48a8d68f5e6130dcf7245baef3c743bd48b95c58ddcc26b6f9b9 +size 133090 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a8be6ba59f709aafa46951bad02060400e2f454 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:595959d0de2ebbd17ae7abf4f89a5afe59bbfbcb58c3c1edc9e32aec04a8bc1b +size 183054 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a82b79083823194102a09eddcb1e85786921e36 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf9d917c6eb46d7f208c108115ffd615cb76bc5cc6b9df49f328ddb13d37eca2 +size 27513 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e372b76e204608c9d71af756595efa293528af0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f853c838ca10d200e43e7d410a95953c7b261d5be3a36548a20771fa6eda95a2 +size 120362 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..448413f4fcfd2ba88f8412487faad79f0c1f7ad0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f4f35825116a1167da4765738eaf6358720db2e59538e87b0ada0896492927 +size 57983 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6193ab9987f21f292a8a0122ee6c856e886beee1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f7b7a89b8602356ff225c6387f5b8580762d7d4c88bd0f45ade7dba1acc9ec +size 25399 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..703494dc465a424633b33e8212e351619b19d88d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88042dafd06148c041ecb7d196bd07adedf0c8c6e338ac45e221efb2db93b454 +size 39445 diff --git a/eval-results/mmlu/0/ckpt_354/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_354/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb253480d2dfd4ada41e625b670bb55762b37a20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e1e1df3bdc8bb9f62cd9f53777b871ce968f04334a9e410f84bd48263be48b +size 32924 diff --git a/eval-results/mmlu/0/ckpt_354/results.json.tar.gz b/eval-results/mmlu/0/ckpt_354/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4f09a1b375734f2fdf5b2e92557e0d8aa6e8c8f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_354/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0e6c98434d3deeb5c0c8dce82906685467f8a049adcde6842c9006c2f0e6602 +size 7644 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d609b3cf0fd5fed9a705e92f583be0016a1b9f6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e7ab7bb78388cbc65421695bd92d6ec42dda00798c7851765d206ffe1d318d5 +size 17094 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d4f028f37bb97f192fac0cbfd75581c35b1fd272 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9c11d608e568a226e755640db8597a5a30f28ceb44d367465355266e6d4dd38 +size 29854 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d834f711b2acd392c92378f86f02ee28e7c733d4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e0001fb1e99b9b48c77db945fc5c699d9f84eb1159fa031a0cff0f21d3f4da6 +size 39864 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b72aac422507f1b271f646c62182ad840bd9530b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7c3cc134c8db35847fc0b2409f7efb565e386a651d20ffd814d2f41d39f667 +size 26759 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6f06b553b1d1bfefb0cbc5dafb5eadf2c046346 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a587fa50ec59d4cc1dd8b496041e6fd09bf2862dfc563f121dafeb50d1420df9 +size 61262 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..938196685db5178c1c478f05ee72ce1c45d23920 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980866255f8f7be0acc1083a503a982dbaf8136098d4dce59f3af53735acf271 +size 40388 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41fade7a77d7795dd2a15338d0db81b5bf44c130 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5700f1f7bce24a6bf32cd7322a3fae70fd58bde6ccfb3a931b46e15b27d41fe +size 23803 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2df99a786da2c3b82ae1f5cf596d9414dd9dd08 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fd2ac42e692728e3eb47898cfc57eaa0b9cf3809814226d67624a33487591f7 +size 31164 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..386c764b6dc91f3b98e084c948b6a10351624cc6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4266dbde115353928e0c7e02e506590c5540aba10e10659894600c8bd5bf9e6b +size 22961 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c96e6ecb7d53ac87616b81c99c8811e928a83942 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e441563536f53b332601b604a0c8bd0b40bbbbbe88dffc2236aa1037bad7907 +size 60943 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c48aa6069f0a4dd5a1eef5d1bd34e0c2d1b37c2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:622394c955e95a630326193a0b4ee6be416416f058a5a533c7a29ad074ed2880 +size 25760 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d6768e8ce03fae4adfe9aca4037b92ded9aed50 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b0247ffef02c285ac51bb804ac3b575704314b28044077e7fd2333126aa326c +size 25776 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdf18f57b222de554133d1ff583f85eeaf36386c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f68dc2c0598107c31aae87d63f43b43ba9b335826e394034f3a6244e2a4367 +size 46528 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..779a19e216c5231c218d5b5840452ed1bc42eaa9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471b6de3d9c6b85d76de6b9b76b3b292bd2be6eadd6ef3f99c9bd64f51140476 +size 31548 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ace4030f232862b0200c0faaa66a0f0b08410b9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:165e3f155e6a9f459e8855e271cc4e31e4990774877388bb09b5bd7b120943c2 +size 28766 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4d13ea03325435d76817b05989c81c319f58060 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d4c62aa7244c9e155b0a37f5400237f4b3daeac02f20b04a8a8fe60a1308eeb +size 74763 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d862ebbb116b5aa5705a25369a29f9dd07a53dde --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a5f9d9a273f6eac9afbe12241a7f099cdedc3da465276461d609dfc2d0b147 +size 30148 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e72de7df7382cb4351ede0413afefbc31183cc81 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3fd73be4429d46dc01d80601bcab731db2a96b0dbed7aa1832d4ac3d3ee5d64 +size 19149 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc1aed203e7f444de463b59a98a23d854986751b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfa1aadcfdce5f74da1890995efaf707dbcaa62e6bb0f662a11d7a686000be64 +size 87869 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..952e425364899f603caf2591de0b92f0f8842480 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13262f53afb6b507f826f4d166b6809f318f79a25e6fe1df4ed00038e2263446 +size 49999 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ecee8004bfed1d4e8569c1236575d7f7de566bbc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:004a5a8e1e23337cbe689f806715b97ce16b0742c015396ab5ae87865077c1e7 +size 31616 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..042f833e1072d160922b24dd5875de8dc595a420 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7966ba8e94e023fe8b721c7ce008464fcf549f86a45b53c4b49b8f9d26f693b0 +size 145486 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9f4e4e41d4dc2a1f480f9a436aebf3337051815 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7473cbbb00f3b9edfda156b13183ae2116c4d9f1cb1092179af4e46e550a4502 +size 44566 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb1b3589476b28b1b0985ce38e54afd0a16714f5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ffa9a537136677e295cf7be5c5fd7f1d71462e1abac6cae106352099d62263 +size 54355 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8382bb1013be1f57543c839452a22e53052187bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b441ae3a24e512be4d7e51b263ad2cdf4e67fc37e028d60c9ec06a9ccb861039 +size 92826 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..95c46f29253f523119516ce6e4d701ad6ce64fb6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e256ca2921ed3e6e34000741a78f90cf0bb1cd4abb4bf5e60e48756cb8b734 +size 57172 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c3fe424ebb5a25354a8f2830e05596eec8040742 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e75bfca46aeb144948028858c0aab13334fa35ef961d8c415f41064060d28f0 +size 58477 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffeef7d08d79db14c88524fb0273828edcbbe809 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dec3d025b50e0b5058cb67f3fd3b47ae4f42ba17ba77c55569114cc8135e260f +size 42908 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3ab29f6606f1bb355fbffaa84b1037632e6f650 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5c15df300ea7c86fe56c417272980bc9a95740c051141b7bb58267c85d82e36 +size 143765 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86516723705e815a965179b62457072f0805c4c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb62417579cdd4914c5f2c56783846646d5fe0c01620bddc1bca8d1e28df905f +size 71221 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..00ce042f79420913d3cc939d827e24473e36f5a6 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f554902455c42eb4b40b7fb225110cb899e6033ef367b43e3cf071bb8743361 +size 161906 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..124a551ea03bf87f9df3b531de71f68b92162ba1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4175a66b73821657eda5b159e24b385e51e5e22edf196b5ff4b8313db6e4590 +size 209521 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..160581f00388a98a05d31bf365c67a14dbc706b7 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26bbde4c85ed1d62fa12a468a9f0718f73a8f0ff8a8b3c726f594eb1f2e08921 +size 49881 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83c3c3de989b5264346bba9186acd81038e9be00 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:045584f57817a7a892fd60308b5677191c0f42f5db9291f30dd16946a5daf36e +size 31529 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d80cce8e08b385391ddf961084fc3d8b7f345128 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f9e5ce1240fbc088cde78b44afbacdda76df12159a1c07b7e9ed4af39b6391 +size 36060 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67042f26ca6d19cdecbf2255446792ab42542bfe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d68a482a24bd37931f0cba15cc6fb8bf8a327c3882ad7b818ec069b653823955 +size 29486 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a4183eca93a30a7ed83ba094d5505a05d58bc5c0 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c42b23ac7a0b27018e7fa175034735d0a133a3248acb268aee97a0986f0556 +size 40376 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2404a348cad444a6e99743bedc96ba3fb24a753f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584abe85f5bdfbde0bbc696dcc5857f5e3b65688a7fea1bc1e200d8fed99dacc +size 27091 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9504ce4b09bf9149d4d577a13c3633e8cde82aad --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46a5b260cd939d7cde4be2b59077e3efd6661a1289550fe0061b132069db3a86 +size 21676 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d04297c511c4f8efc2c3514136aa0577a2f7f21 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:586c027943fac3680f4c2af9054dc1eed7cd4eb212146f017b35bf0f8c0de119 +size 57396 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9505b67f9c7e02ecf64b98e8df791666fb28db58 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:239ffd99b033a0e106e749feede2c18973b6971f13b52558f2d674c1ad812d33 +size 22325 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a6397e2932c5233753a87fc803fa95322330edd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35985b17d1151b3c27a82d843c236d0d031bd6bf41a65b5b55a966a924abf724 +size 169071 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4656c813f6692b65b090b2899489d9961125243e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a261eaa61e517582fa29470025bdee12af0c5910bb5ba1f5e894ca484e9c04 +size 90287 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b2c179fd7a7bcba64cc793764e7f1861a433248a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:263e2c2e979d96f05ee7b53eef2eff2a0b57f57cc2ed3de8dadab233f3c744a5 +size 154185 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..372f7eba3751132ced39e1f626ac41818365d714 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b1439068baa8decf11b5d9409451dc982ac42ec71f7ad803130c053971bc0e5 +size 79483 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3eb89de87fe533a610b5024c62626a80a152183e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:694c8f0296b775c2b808cd291a94c17a242d08209cf2ac70b64a86782af05ba7 +size 74387 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f1b983ebfc2cde8291824478cdff6b5f732153b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffb64a206fd698f1064d3d6be439ccb40a0b2069942be0e362e1eebe7449640c +size 82511 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae5426e1bdb5802c154fac1eb9e7bc0de52ed600 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0bbd069faac1723e3a4f557a4925cdf7ad6d77a64a764ad636f3d15e76a108b +size 90005 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54c2ffc2a2fef7463701f9283b0e232b57b130d8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e738fa608f3456507391258dee54b595788339d718fb69344cb2e177b4f9d4 +size 1005609 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..952fcf24c33ec4a6c8025a707d104d05aa56b11c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09c7ac375631af5b7e6b8e1d6b584b439b38b7d3f2f4a42856828fe67d4eccf1 +size 133188 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac70d8807cc8bf40ce651f77e4c73f2ca47836fa --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9551d4cabc40d7f254f5e504449dd48e2563eb2d8d71f198d04f2650556cf6a +size 183079 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe5681bbb053d491a9786498487979d008a73c27 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd01fd722df391902daea81a21cba4e3e78aeca03cbd0f7abdeaf43e65259db8 +size 27540 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f035aaf76ce4841d8a3fa8c77878a834d3d76584 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0f6b8154fe901a9f64b383e4c96d3a3ee864e943a3cce6a9d09fe12c6dc7bcd +size 120452 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..59e160ca5b9701cb8e0d651ef5d558929f94f79e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea97716d87168aca97fbc159dc91ac46f0def2b9e6a8cda9ecdec3a27bd51fe4 +size 57967 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..500188f493f3d16ec4cf664a9d8933a267b69ad2 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c018c1feefb8939ce0d9c7a170647c0314d36fe63b3a96329603f8d0d6da0a +size 25400 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9827168ea315dbae44764e7add08a09e05e65ef1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e4ce18cf0473c02c9c87e94f2a1c2fd7844a6a78ff060177babe45d7f6c096 +size 39453 diff --git a/eval-results/mmlu/0/ckpt_357/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_357/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa9501bb569953436a65f13209e82ac744276aef --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffaec3c32fd429d0e64405a6ca942516f82b4ded8b519b27d1917dac0622e1a9 +size 32931 diff --git a/eval-results/mmlu/0/ckpt_357/results.json.tar.gz b/eval-results/mmlu/0/ckpt_357/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fa36ec98f2af465549e04252b06535c2a4b6f4b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_357/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f69050df60eac402fcdb6eae571692df1b401a7e1b7cb575cd545c585643dd40 +size 7628 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_abstract_algebra.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_abstract_algebra.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0e183a7c498f70d773c095cd89d8b97110cfb4e8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_abstract_algebra.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1286c1b43575a6127524686ce830d1313a7d8c651b4fdcaee5134ac2273317d4 +size 17103 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_anatomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_anatomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed40e6b07b68b4903b32f2b2bda1de14034b393b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_anatomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd7ad0edde9755a6a4de91d869259d33bee756094bc599f4d53e26bb23691a89 +size 29831 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_astronomy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_astronomy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..324d7e76dda425b630177b42870594bc4dbea9bf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_astronomy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc5a300adf0af87224e6c97a0c5bebd17f131bdd383c1b4a52bbacf6a665e830 +size 39805 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_business_ethics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_business_ethics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c9a0b704fff5e63d2fe69d3fa1424ccfe6a39cb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_business_ethics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde9887d124cda1d5713d976a87d8fb2e02c311e71c724d4578a4aa3c5d1d5c3 +size 26788 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_clinical_knowledge.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_clinical_knowledge.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fca6ff0abf745f584f589e350ab64fcacdd2cfe --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_clinical_knowledge.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96e44bb9b4c14c7af86e9a7a7ecdceaa46fd8ab460c82ae922c16816f31e61c5 +size 61219 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_college_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_college_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8598c1bec23796d06eb365d99f9b1ccca7e9efc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_college_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50dd1a7b2f567eba87b5681a8beea3916fa0fc822ca63d07e84ea7b787e41222 +size 40393 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_college_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_college_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15afcd7e8418fcf34eef8e2fdc304780ebd48501 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_college_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d77535ca1c17bdf93b90fa4c139d41f16bcd7c007caed3778bbba84183aeb4d +size 23815 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_college_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_college_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ce5587978e953dbe9032cf0432bdb7c9b3b1a4e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_college_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7581d585ab9e7dcd1ec9abb2c1a36b94cea631badf05708e6773abc986c27437 +size 31166 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_college_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_college_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf906044874fc5c1d4399ba44cfcf7ad4444b869 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_college_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fb30faa6d6e28962eee193e699b5146f8aa9d05140ead68bb78ceba6c11edc4 +size 22950 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_college_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_college_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b1b497b81e16bad5edd6fbb6fa0fa227b18df85 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_college_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2e14a2330f2d29e420258f3cc10d5490dfd407be2d597cd661710c7f609012 +size 60915 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_college_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_college_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20649d5db4e492ad994176dd92b1e1f8e9302072 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_college_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a1f6155cecd66ffd93ba580548af6eaaba105bc69e3af58c1c86c62447e8da7 +size 25745 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_computer_security.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_computer_security.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c65e5f0bb28cab9cbe992db895ef3f8965ea8447 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_computer_security.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b8a66996997c82f02fd0153a9e9708ad7ca2bd077147953ab4c294cdebf3f2 +size 25776 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_conceptual_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_conceptual_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b007094e528435edb917483171678bee51b4136c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_conceptual_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:418bd1419bbb8478d3929e579cf7318018cd8514d6b412f9ad3ef0d0cf644a8d +size 46569 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_econometrics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_econometrics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..111f6deaae4901b5116ab6d8f970ec18dbf8582b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_econometrics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ce09b4a207f3dd13bf8eed5356963090004492d155e32e3433f84194dc2fdb +size 31487 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_electrical_engineering.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_electrical_engineering.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2084d03b26a203f7a10e372190316998a5f32a0a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_electrical_engineering.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2656541822a1f4c41eaf90ece17ac2602a9f25e02b92e4f89fb687e88bb0addc +size 28785 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_elementary_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_elementary_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d78ca70564328bfeeb10a3e549af385ee90d03bb --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_elementary_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99fb1f80c1d0e25e0f7607ac09874fc5ad330f9a9015e25aa7360ebbfc65ff74 +size 74810 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_formal_logic.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_formal_logic.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e087f8171afc891e7da0ec3da388396bc1b01852 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_formal_logic.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49559c974d7797c26a3a8b924523861606e51e56726f97d14fef2946d2f9e469 +size 30145 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_global_facts.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_global_facts.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1929e6c8c001c6f0ea35019976312531bbe62ef8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_global_facts.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ad7b52f3bfce3c0c00f016cdb45200ae9834582fc074f8d56a2c7e57dfe9a5 +size 19159 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_biology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_biology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6fdffbac05e1d8bda5c01d8cd01e79b0726d847 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_biology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e6ab2736f3f59bbf02e69304677b2d75bc587035b03e9070c04be70649592c0 +size 87931 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_chemistry.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_chemistry.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73cd80be967c289d13885363a87c84c2ed15462c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_chemistry.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c969288ef8bc7a09f427d90b00b24ff7f70ebe8061b91ba3242697fd3c9ee8c +size 50021 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_computer_science.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_computer_science.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..602405b8c83949059a02df1514e219549e3fab4a --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_computer_science.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f826681613f0d37e4d6c296f21a6ee7d765348203f3df5feac80e02c7426860 +size 31618 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_european_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_european_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d424c19d94f646fa3f654ec0ed98c65c39020891 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_european_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9186a3f64693f31c5041cac7dabb6f4ca3208ede565d4be6241199ceac13f7d +size 145398 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_geography.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_geography.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45e7b8ff423822af4a609d6dd6b6f3c7cbde8a74 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_geography.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29095baf1c09ba9878c91190c1ead37072b913a7bb5b06bae89c6ceffb3dd4bc +size 44528 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_government_and_politics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_government_and_politics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..899a1041ee3aa7b794574d3b7aaf68bc28bcf62c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_government_and_politics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbd70a836b8d514f10c2efa6b386bfd8d6f56024e388949df305114c00ca049f +size 54320 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_macroeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_macroeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1be29318866b5300e3fb1093474f9f749f50453 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_macroeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a909fb4570e157202523065028d6b5b2a954a14f3dc092d07befbb494007a172 +size 92849 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_mathematics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_mathematics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..465403b4ecfc4a8c9a3761c878c3c121702ce7c5 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_mathematics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9817d6a69180cf26666df66342e4bb2e6b9cc48707bc85e8c6df6ddfec3c18 +size 57224 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_microeconomics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_microeconomics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77d249dc73c24f44e793fccea0a7c8712c16f1e4 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_microeconomics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d229be45e92ece0da4716910accd3855ff8dfbfdd2fa989c6280e77ef4135ee +size 58437 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_physics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_physics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f7a8b90b7e25b3d3ee8c11e6b12ec64eb6c228d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_physics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d19be9888385a3e9fb5fdd91d2c8b30e31e43ad9041be9cfaededc15307c6f93 +size 42934 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6aa3d9e124724743003d5920e195110ff825c2d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f54a2974aa27fe79aea3f979fa404e698aa9d782240a050b7f8696caaa6fa56 +size 143806 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_statistics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_statistics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f405d41d4394300a63f30417e5131d4d18a387cd --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_statistics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f17726610cb835e8ec5b9434dbd8e083e8bde64d6d61596a5f57c3337b982192 +size 71255 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_us_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_us_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5dfc2e440db60f03995a0c3b46ecc502663a2832 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_us_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:975ffd466aaf99c065d098d0f1aa2dc3187e6401365d132ed1022ad2b9252e54 +size 161876 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_high_school_world_history.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_world_history.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31a8d501b0921c3288da1b3cd767a04a7d272c19 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_high_school_world_history.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e1cd2d8051444ebf7e67f24c10b30e2a160caaa2756b332c5b79bb15d2d6730 +size 209404 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_human_aging.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_human_aging.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16591695bce455f9d93a54f02fd7b3b0bca3feae --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_human_aging.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da1c7af4a30db95b94e4ae2e52a54e70c6c0038f1a6d985bd63baa080a6293c +size 49942 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_human_sexuality.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_human_sexuality.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2d4deff03dd8a2042fbbae20e6ded282c0cfce7f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_human_sexuality.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13fb67a946a676921d5738f268221d5452cd5248de897cda10f2bbc3f65b581f +size 31518 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_international_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_international_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c88e6681aa4af29fedceaa4f13fbba90f2bd1b96 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_international_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9bdac2f4bae88871fdc485244b37b9b97e4b8bbf98c68eb15503d804ed02d49 +size 36088 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_jurisprudence.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_jurisprudence.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6289aa6215994a92bd0e74b772633a892abbd902 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_jurisprudence.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb6991d841bf08373666d339d5b248763193d9b6d80c2ca8603c7a3708e9654 +size 29509 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_logical_fallacies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_logical_fallacies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..40f6f2b9d1565280e05157ef334a9547fb5b33bc --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_logical_fallacies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eee7e673d1538c5a496c9edf670e26448f291a1cf6ab67715cf5bb74c32a0afe +size 40334 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_machine_learning.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_machine_learning.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24ef35c42657e92b32b3e1c05300c70a266e244e --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_machine_learning.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ac1b3794f289ac5ed0822fe4c01d541682273ac6c1e6e31a1694a1f3a75f6ce +size 27047 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_management.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_management.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03df13b9cb8ece8fd7ff6d0c1aedf4a63d33288f --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_management.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a1b48830973db46d83dfe312d417bcb3c5b01db4fd091305136a00e9f1d67d +size 21661 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_marketing.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_marketing.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec08d71f79fdcdd6bae4990250f8fccfb8c1dd5b --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_marketing.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:721a23db246025a7c903f932370babd8dae0e7b6df299c58b7d0306a4fc26fed +size 57418 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_medical_genetics.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_medical_genetics.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8de97fbb6e300d36e19f096d4bb4c4220cdf596 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_medical_genetics.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2048fe23e52876caaa2587d1ce85edf80d97dd35f674a24e95797e1422acfec +size 22284 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_miscellaneous.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_miscellaneous.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d08542131c97a0b7a1ed245327c20abd86bab26d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_miscellaneous.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:011d5e4ce6127ed3fbb2eaae64e0095859b8e0d9261e425a3ac9de982819297f +size 169075 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_moral_disputes.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_moral_disputes.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c9c4d5a287b89b1628ba57a713d96bdd793336d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_moral_disputes.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c663ca6d11b130d18dc197176890554b6e0f840d61eb4b2f67b26adf4e08de1e +size 90289 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_moral_scenarios.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_moral_scenarios.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b6080f4501657af70919519c6134f6708f53db97 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_moral_scenarios.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfaf4e34ab495a10e79d0e5dff229b282c7364a3fd0ac516437381f264fa3fe7 +size 154069 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_nutrition.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_nutrition.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d937ddacc2b01d9e3d9986450f02b4c28087884 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_nutrition.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d7666db575df9ed75e1fb0d4b3d23d6d9d021a4964df04d78b4751e799e4ee +size 79445 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_philosophy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_philosophy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..432c83f536e1c4064f549f1ef5d92cbecdb04e20 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_philosophy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c148ec0307f7f788032924c8fdc20861933e55b0ce3f49c233f46d4b6243042 +size 74357 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_prehistory.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_prehistory.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b783b7d7189569fa165cd0b88640f2ece2a08c15 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_prehistory.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99698d8525c975a73026ed360bb5d674a0fd641f0f9e43bced62e2a1064634f9 +size 82522 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_professional_accounting.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_professional_accounting.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3570de42ee1c1c2b2bd78416d9801bf4d463eea8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_professional_accounting.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7caa0ca58f276ce7556c4e12d566aa1346e4963ab3f95a9515e9b8b65fa6c9e4 +size 90018 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_professional_law.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_professional_law.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30e1599e47f3b14419a4db7afd68e28571871106 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_professional_law.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae9670bc4d5c62035b55fa1b88250a8d606015db616813d8ed20ee183335ab32 +size 1005706 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_professional_medicine.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_professional_medicine.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be3cf136368b19bcf23b062eba10ee5d9417c0f9 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_professional_medicine.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf7cd82b1ad46ef2fdc49beb9ef40644add35141176065269ad98922fc977f5 +size 133206 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_professional_psychology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_professional_psychology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8081996ea4719f72a7cda6ea2f55c1f5d7f92c49 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_professional_psychology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4739c87c1a08af6903e6aed6315d96fdcf83fbf3d42be3a9d0a9797582eaee1e +size 183180 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_public_relations.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_public_relations.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5396be6a696c463eae244f4420510a222c3d0f8 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_public_relations.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a879a1144608ea1604c4371b4db6a9dfb127f165587bc83a5d44d2539413ab0f +size 27535 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_security_studies.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_security_studies.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30235caa6637e30979ed764da2fa0ac0dda7eecf --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_security_studies.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081306d41659fb9a51c9ef1233de880c56502fed6dff5feed92605a4ad83ee60 +size 120464 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_sociology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_sociology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5cb82f43f5d232990a5db23aa29c850cdb19f45c --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_sociology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0354aee9d60a263915d11e056694cdbce815ef4c2be73812f22bc9eb459a8aec +size 57972 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_us_foreign_policy.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_us_foreign_policy.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9d266cd5dc755070189d3d8c2d2a9065c74ba35d --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_us_foreign_policy.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49250e9cd43bd9e9889bca51a251e92e3f7c5030f5ff6747dbf39687b55ca117 +size 25389 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_virology.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_virology.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82b56a4fcc792b118ae2146b35e3f7327660c3ba --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_virology.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7846c6f6c528316be8010d1278cd4bd7a951639c6b5ecce0428a0fd12fda4f0a +size 39474 diff --git a/eval-results/mmlu/0/ckpt_360/mmlu_world_religions.jsonl.tar.gz b/eval-results/mmlu/0/ckpt_360/mmlu_world_religions.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..73fd8799b96dff787a163a36242134b51f700a60 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/mmlu_world_religions.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:504637f7556868c58cebae644ccda81e38d34cf25378f48b3772200a59976480 +size 32913 diff --git a/eval-results/mmlu/0/ckpt_360/results.json.tar.gz b/eval-results/mmlu/0/ckpt_360/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..139e2eaf3223fa28b77c27d47ddf11093e4570c1 --- /dev/null +++ b/eval-results/mmlu/0/ckpt_360/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4931d7d27749f1d8ac0bc2d549c566d71198a591d8d570c04e9848a032c6c869 +size 7602 diff --git a/eval-results/race/0/ckpt_003/race.jsonl.tar.gz b/eval-results/race/0/ckpt_003/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3d4531091aa8eb28e0cb6e72693d94677bb8558 --- /dev/null +++ b/eval-results/race/0/ckpt_003/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf9b8739bc6790b3b0180a045fe4bb586ce0eaccd3716e7663d1859f253f2bdb +size 1343453 diff --git a/eval-results/race/0/ckpt_003/results.json.tar.gz b/eval-results/race/0/ckpt_003/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3966f989ae0b7c6b8d871cba46a1629594ceb8fb --- /dev/null +++ b/eval-results/race/0/ckpt_003/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf51354c928107d0121d41706fdbe6a925966a846f707d4dc4ec1b9f5842f15 +size 2874 diff --git a/eval-results/race/0/ckpt_006/race.jsonl.tar.gz b/eval-results/race/0/ckpt_006/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..86635c153dc749b226f719f395107b868dc524a0 --- /dev/null +++ b/eval-results/race/0/ckpt_006/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eba961adf5ad613d1150e3a017f05d231aec519de61a807c2ccd1f51713612e5 +size 1343448 diff --git a/eval-results/race/0/ckpt_006/results.json.tar.gz b/eval-results/race/0/ckpt_006/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..eadf4abb75f007e9c4a11f57e4c6c98d77b501e6 --- /dev/null +++ b/eval-results/race/0/ckpt_006/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53e6ff02a1052de377f1ccfcba8d998341356c9bc85de4ffb4d1d4e1e152b883 +size 2874 diff --git a/eval-results/race/0/ckpt_009/race.jsonl.tar.gz b/eval-results/race/0/ckpt_009/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f0c63379fe3ed86c000a4255cafc9ffebc470ea --- /dev/null +++ b/eval-results/race/0/ckpt_009/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ce7fcc47b5bdab12467c2a8d894902474083b07aba9eb465c2b4ba8a39c5ba6 +size 1343410 diff --git a/eval-results/race/0/ckpt_009/results.json.tar.gz b/eval-results/race/0/ckpt_009/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19c8e54708936db3db15840115c8262fa04ca546 --- /dev/null +++ b/eval-results/race/0/ckpt_009/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00e9770976e8f91b51374a90d479451c2445610baf3682b1561b90c40039b888 +size 2875 diff --git a/eval-results/race/0/ckpt_012/race.jsonl.tar.gz b/eval-results/race/0/ckpt_012/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..503915ae50f60a57c77a33275a08db5e070a7d02 --- /dev/null +++ b/eval-results/race/0/ckpt_012/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff6a4182d9e87eafac52b4f7ee45eb12bab7d05aeb885d1872fadb0f9acc1e5 +size 1343363 diff --git a/eval-results/race/0/ckpt_012/results.json.tar.gz b/eval-results/race/0/ckpt_012/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e848a2b41237cd57f662d1746b9e7f2b31aac5da --- /dev/null +++ b/eval-results/race/0/ckpt_012/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dffb64942c8d63fb64d1480a1e6dd1fef42f54567ec96f090d581ad076e3208 +size 2875 diff --git a/eval-results/race/0/ckpt_015/race.jsonl.tar.gz b/eval-results/race/0/ckpt_015/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdc33d5af45e152eb733e5f23896a99064b1c5f9 --- /dev/null +++ b/eval-results/race/0/ckpt_015/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:198245fc7ee5dbe3824b3ea416d6e1d6a56468ba4c287d214d4efd4bc3e4260e +size 1343504 diff --git a/eval-results/race/0/ckpt_015/results.json.tar.gz b/eval-results/race/0/ckpt_015/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1617bc77c0ae86ba3ebeade5b9d80b8d32c53503 --- /dev/null +++ b/eval-results/race/0/ckpt_015/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:930407299489cb3a53ff15e24977cafeb19d53020a0d4179af6abe6503bcfd1a +size 2875 diff --git a/eval-results/race/0/ckpt_018/race.jsonl.tar.gz b/eval-results/race/0/ckpt_018/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b65ddf1496e87157321a432e6ef5e77f4bcc7589 --- /dev/null +++ b/eval-results/race/0/ckpt_018/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e2190780465a4755bdf5c60a1dd9ce84fd02518d4e5dacd9d8b814b86d3c06 +size 1343455 diff --git a/eval-results/race/0/ckpt_018/results.json.tar.gz b/eval-results/race/0/ckpt_018/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c2f0cece48a2ead4fab9f4250f4f7e1faf1caf9 --- /dev/null +++ b/eval-results/race/0/ckpt_018/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d92c34dcabbf695d4259fbba4252baa6c1ea0a1be75920eb57aa2cb54cdbc98 +size 2872 diff --git a/eval-results/race/0/ckpt_021/race.jsonl.tar.gz b/eval-results/race/0/ckpt_021/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03ae61d4186e62b787f133f74c55e27b14993a5f --- /dev/null +++ b/eval-results/race/0/ckpt_021/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84ab839f1f8a68ba472884cf66ebb762bee2c61b3077e7aa97a669ff181d1e84 +size 1343348 diff --git a/eval-results/race/0/ckpt_021/results.json.tar.gz b/eval-results/race/0/ckpt_021/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47ff9dda3a9edb43c74c4326680b568f1f807562 --- /dev/null +++ b/eval-results/race/0/ckpt_021/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08dc19341ccf12062a0cd981ec727f65a0983fa5686a4b0a28d6dbbfc7e60fdf +size 2905 diff --git a/eval-results/race/0/ckpt_024/race.jsonl.tar.gz b/eval-results/race/0/ckpt_024/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63729b56dac8716dc946e9869db8945c17a0cfbd --- /dev/null +++ b/eval-results/race/0/ckpt_024/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6ff6b8a2b17a74dd53f5117e428484729fe161e732733a5a63afaa10babc6a3 +size 1343396 diff --git a/eval-results/race/0/ckpt_024/results.json.tar.gz b/eval-results/race/0/ckpt_024/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..259c29dc7d4c81c88d42d4c17a14b1f3c882b4c0 --- /dev/null +++ b/eval-results/race/0/ckpt_024/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a527d07112fc1aee84719377f33bdc931f0b84ef5d810c99cdda46dd09cecfa +size 2906 diff --git a/eval-results/race/0/ckpt_027/race.jsonl.tar.gz b/eval-results/race/0/ckpt_027/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0b3d25b1d5f81dc0290031d944bace0130996fe --- /dev/null +++ b/eval-results/race/0/ckpt_027/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f73f121ee68858fe2fda9ffda851e34c7dda4821a3a9a73c5ea9e29abdcdb79 +size 1343582 diff --git a/eval-results/race/0/ckpt_027/results.json.tar.gz b/eval-results/race/0/ckpt_027/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..05fd5f446be697d91780999b6dabef4846bd7197 --- /dev/null +++ b/eval-results/race/0/ckpt_027/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1216e633796089b08db5a9174f583dd21416b659a83e189f92da497af84c3d9 +size 2875 diff --git a/eval-results/race/0/ckpt_030/race.jsonl.tar.gz b/eval-results/race/0/ckpt_030/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7916ac74b3f96f1939cc698b7f3c4c8a09b7911e --- /dev/null +++ b/eval-results/race/0/ckpt_030/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f90d440239036fcfe5f2ada780222c6624fc9cd4e10a0659bf7195e459d0f3be +size 1343368 diff --git a/eval-results/race/0/ckpt_030/results.json.tar.gz b/eval-results/race/0/ckpt_030/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6210c68e69d7b177037704a5c69d66e54dcc921 --- /dev/null +++ b/eval-results/race/0/ckpt_030/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44900c0031a2551be6688fc7c76e6820ab7bcf808fc05d9c92a83c8f2e5d0fa0 +size 2866 diff --git a/eval-results/race/0/ckpt_033/race.jsonl.tar.gz b/eval-results/race/0/ckpt_033/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2591bcc8e10e37ca946b8c41315d425101c220d0 --- /dev/null +++ b/eval-results/race/0/ckpt_033/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7276195149b94d970f62efa3403197c244017545f46897322bf8a0006fbdfdbb +size 1343403 diff --git a/eval-results/race/0/ckpt_033/results.json.tar.gz b/eval-results/race/0/ckpt_033/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5aad21eb2fee22a8f0bbd29d3694531054c1dea7 --- /dev/null +++ b/eval-results/race/0/ckpt_033/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c373ebfbd6ba262c8e8cbc83807604eba9c137ef74a3755883b433e544e4b6b +size 2868 diff --git a/eval-results/race/0/ckpt_036/race.jsonl.tar.gz b/eval-results/race/0/ckpt_036/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..84de2e5e604f93bb10db42e364f710094aabf3f3 --- /dev/null +++ b/eval-results/race/0/ckpt_036/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:977012e63435abf7f12b2fd21262c3c3736e7cd72a1f59c2a82a6049b9dd5f55 +size 1343537 diff --git a/eval-results/race/0/ckpt_036/results.json.tar.gz b/eval-results/race/0/ckpt_036/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e63f6c17e7b707b1c631889fd85e6aaac603ce0 --- /dev/null +++ b/eval-results/race/0/ckpt_036/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4bde222b30425e25e9649ce220cccc1accd1417d3c0aa1849f4a7726d0e40a5 +size 2872 diff --git a/eval-results/race/0/ckpt_039/race.jsonl.tar.gz b/eval-results/race/0/ckpt_039/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..852dddf7c5b8ea55a3ac7c40e526d6364510541a --- /dev/null +++ b/eval-results/race/0/ckpt_039/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb31b9e94216f92fcbbb225686353ae2c73c46628be6a332b6cc92971ff71123 +size 1343464 diff --git a/eval-results/race/0/ckpt_039/results.json.tar.gz b/eval-results/race/0/ckpt_039/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50b33d113f44c482a0d8608e7e2141dbb78f102c --- /dev/null +++ b/eval-results/race/0/ckpt_039/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:317ac293f7c37e6d20c13072a29336aba16ce064f09fa45aebe54e68b13163e0 +size 2875 diff --git a/eval-results/race/0/ckpt_042/race.jsonl.tar.gz b/eval-results/race/0/ckpt_042/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0a4743ea3b7592dd4fb30369baf63bb90060cc7 --- /dev/null +++ b/eval-results/race/0/ckpt_042/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a34e40b53be5914e4a40cb945338efb10d9c45c13752449b5b3e265f24cd08 +size 1343463 diff --git a/eval-results/race/0/ckpt_042/results.json.tar.gz b/eval-results/race/0/ckpt_042/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8dc61139702f36e677664dbd9072bb64a425764 --- /dev/null +++ b/eval-results/race/0/ckpt_042/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adb42df03fbd378c7f34ff32a57418211a6d4d70a0ccc7f7092b4b7bb2ea506c +size 2873 diff --git a/eval-results/race/0/ckpt_045/race.jsonl.tar.gz b/eval-results/race/0/ckpt_045/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1946bfdbbb8fe044afddc919ea5ad280276f4f91 --- /dev/null +++ b/eval-results/race/0/ckpt_045/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5ab3ba102f1a3258da3d8b9d0cc6ba8ec51ce76d7c41a26d05e5069517cacf6 +size 1343351 diff --git a/eval-results/race/0/ckpt_045/results.json.tar.gz b/eval-results/race/0/ckpt_045/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1b4429af715ba6e5f1ca7ba32b899e423d6ee8da --- /dev/null +++ b/eval-results/race/0/ckpt_045/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:855ec23639658553c12cb020e3a34a4bbad5cc0ab201f32af3d3a8ed30c29709 +size 2877 diff --git a/eval-results/race/0/ckpt_048/race.jsonl.tar.gz b/eval-results/race/0/ckpt_048/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f4da78cb4c8b4604f0cf783d638ef95d8c347eb --- /dev/null +++ b/eval-results/race/0/ckpt_048/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8566a834ec7b1458a793c70b5f2b6f20e446b26810456846ef99a123c624ad54 +size 1343462 diff --git a/eval-results/race/0/ckpt_048/results.json.tar.gz b/eval-results/race/0/ckpt_048/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..482d7dd4ee218ef904f4dfa98a0544ebcb3f1ddf --- /dev/null +++ b/eval-results/race/0/ckpt_048/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:056b3e0dffd10f9487172df658341154d22e38dc1473e91802167490a0a43e9a +size 2872 diff --git a/eval-results/race/0/ckpt_051/race.jsonl.tar.gz b/eval-results/race/0/ckpt_051/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55fa8af26433bc56492388570283ec474a9c1747 --- /dev/null +++ b/eval-results/race/0/ckpt_051/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3eb9e113def4c0f2be35cc13065cab963c59979dff5d1486ff80adfdf2a812a +size 1343428 diff --git a/eval-results/race/0/ckpt_051/results.json.tar.gz b/eval-results/race/0/ckpt_051/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1aeba2c4af0d0b878bb4c0c86387bb75ce6af15a --- /dev/null +++ b/eval-results/race/0/ckpt_051/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d824d6a75048e1a9d0cdb08d4c11f6263021ba709450d04bc8de904a631f4d2 +size 2875 diff --git a/eval-results/race/0/ckpt_054/race.jsonl.tar.gz b/eval-results/race/0/ckpt_054/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aaccc94975009521d42109693f6350c7497b173c --- /dev/null +++ b/eval-results/race/0/ckpt_054/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5c47711145ea423c593c42d737bb7cd9db4adf3a5d971cbc3ad4eabdfa4abb +size 1343434 diff --git a/eval-results/race/0/ckpt_054/results.json.tar.gz b/eval-results/race/0/ckpt_054/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77314cce51894a63ce7782794d97c77b76f94f22 --- /dev/null +++ b/eval-results/race/0/ckpt_054/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e34cfb257baf05e9a22aa476ac3cb47719d87e9544d4c24005b2afa57728ed +size 2905 diff --git a/eval-results/race/0/ckpt_057/race.jsonl.tar.gz b/eval-results/race/0/ckpt_057/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd7456262d12a1011f36d3ff5b6784a181ff689d --- /dev/null +++ b/eval-results/race/0/ckpt_057/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a0efc3634e1b6d90193b481f49645f2b8a0886515cfe4dc17aa13bf19fce7c +size 1343410 diff --git a/eval-results/race/0/ckpt_057/results.json.tar.gz b/eval-results/race/0/ckpt_057/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6814eb60e2a7c4816c30793ba4a58d4ef19308f4 --- /dev/null +++ b/eval-results/race/0/ckpt_057/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476158c090367f7da405ffb2f4a3687b4ba770c0594c64a75629857f2fcbbac1 +size 2876 diff --git a/eval-results/race/0/ckpt_060/race.jsonl.tar.gz b/eval-results/race/0/ckpt_060/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..96fafd6eb4b7ea242a5dc83af6182c6c5e329891 --- /dev/null +++ b/eval-results/race/0/ckpt_060/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b07709a1f43cfdf5236672a79d3886c3ca6a05403cd2d458d920920c089c258 +size 1343535 diff --git a/eval-results/race/0/ckpt_060/results.json.tar.gz b/eval-results/race/0/ckpt_060/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d303524ca16544f70a6565672a93e1867df1735 --- /dev/null +++ b/eval-results/race/0/ckpt_060/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b775ccdca1165522af3bf534fee356f6171f671a5e40c69e6e771aee55dc709 +size 2874 diff --git a/eval-results/race/0/ckpt_063/race.jsonl.tar.gz b/eval-results/race/0/ckpt_063/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..576a90fe29b74a6cc39aa01209d14d51d3a71d51 --- /dev/null +++ b/eval-results/race/0/ckpt_063/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c67bab17fb88a53d58f957b79c8d7ae90466a38430e80d6dc68ea299a2f3f18 +size 1343368 diff --git a/eval-results/race/0/ckpt_063/results.json.tar.gz b/eval-results/race/0/ckpt_063/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0924c97fa5c6d1945228865d9452fd865368a0a8 --- /dev/null +++ b/eval-results/race/0/ckpt_063/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a5a95a8cafd2037381757ed043ba213173a3eaa56fbfdb97a19a0e6d9907c3 +size 2875 diff --git a/eval-results/race/0/ckpt_066/race.jsonl.tar.gz b/eval-results/race/0/ckpt_066/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a072ef5d608be5d840daccfc9247c75bff33ffa --- /dev/null +++ b/eval-results/race/0/ckpt_066/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1570e0461c4872b87a33f3fc0a846312f2eeae6a6824e11e66097af2b2fba8e7 +size 1343382 diff --git a/eval-results/race/0/ckpt_066/results.json.tar.gz b/eval-results/race/0/ckpt_066/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58563ba13d2d720c2a68155c74ee947aa947735a --- /dev/null +++ b/eval-results/race/0/ckpt_066/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:272771795f5091e25d318280c4a095d1cebbd28e22c490e39e37085409476341 +size 2874 diff --git a/eval-results/race/0/ckpt_069/race.jsonl.tar.gz b/eval-results/race/0/ckpt_069/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f021c98c3d02774cacb43af5e193c8eb565b7c5a --- /dev/null +++ b/eval-results/race/0/ckpt_069/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adac2164a0fbd3e4026d1ea1c7b524e574858ba1ef715b6a07c19416f8338372 +size 1343471 diff --git a/eval-results/race/0/ckpt_069/results.json.tar.gz b/eval-results/race/0/ckpt_069/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ca840f186600db6974e8c4a0e0b711f982807e22 --- /dev/null +++ b/eval-results/race/0/ckpt_069/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f70bfe477ad144f77c053109e589ad1b7220b7b9aa85c127f4a60a58775a4236 +size 2873 diff --git a/eval-results/race/0/ckpt_072/race.jsonl.tar.gz b/eval-results/race/0/ckpt_072/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..403e67a539d42f8aa5bbd48d95e49aeb82ad7290 --- /dev/null +++ b/eval-results/race/0/ckpt_072/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1f18b88b7ee4613b0c2933bfbf8376e07f10a7985163ec7664d3ba805a36ab +size 1343292 diff --git a/eval-results/race/0/ckpt_072/results.json.tar.gz b/eval-results/race/0/ckpt_072/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..39e4fc9a0b11d2b2271ef0c976cc8c02a0706dcb --- /dev/null +++ b/eval-results/race/0/ckpt_072/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1393eefdbe9c1714ffd721e2e40c4211320a42d1b20f563e8f0e46196efbfc9 +size 2872 diff --git a/eval-results/race/0/ckpt_075/race.jsonl.tar.gz b/eval-results/race/0/ckpt_075/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ed10d6ebe6358f66624f4e1586d95b518a7a502 --- /dev/null +++ b/eval-results/race/0/ckpt_075/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73c8061445d94e3a5a13c2651ea1f4923c80ab9a45ff9e5a77940f801e10852 +size 1343528 diff --git a/eval-results/race/0/ckpt_075/results.json.tar.gz b/eval-results/race/0/ckpt_075/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa6eaa3f1880f3d366a045897938915906397f77 --- /dev/null +++ b/eval-results/race/0/ckpt_075/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7219b5283417425823a6045ee735b7590eb782af2df73a2b5593c041938ff36 +size 2878 diff --git a/eval-results/race/0/ckpt_078/race.jsonl.tar.gz b/eval-results/race/0/ckpt_078/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cb01436db1b8edf87f3a6214cc2703c0f360050 --- /dev/null +++ b/eval-results/race/0/ckpt_078/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5be59777c040b8a3e0161db6c56f071781a2a81c3e06ae870c115ad8f9ac30f +size 1343362 diff --git a/eval-results/race/0/ckpt_078/results.json.tar.gz b/eval-results/race/0/ckpt_078/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bc25bc7e35e8b64238b314a43de1e1d09c2aaac --- /dev/null +++ b/eval-results/race/0/ckpt_078/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d16ce8228bfb95e637eb15167ab0a3e508b09a092ed4d1de49c11c05b92689 +size 2874 diff --git a/eval-results/race/0/ckpt_081/race.jsonl.tar.gz b/eval-results/race/0/ckpt_081/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58cbced78a816e8d0cbc80dbbb84e777ba318dba --- /dev/null +++ b/eval-results/race/0/ckpt_081/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42bb8e2b8ad472e31f794ab372bd1b41e9806a6fd1bbcc2dc450756972e8d75c +size 1343429 diff --git a/eval-results/race/0/ckpt_081/results.json.tar.gz b/eval-results/race/0/ckpt_081/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f57720ea560090e7c371fedd1cc83e2d928f6dc --- /dev/null +++ b/eval-results/race/0/ckpt_081/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a6edba40809f4b9748073e6a56a1a22e8994e3273ab3febd041200e0d07668a +size 2874 diff --git a/eval-results/race/0/ckpt_084/race.jsonl.tar.gz b/eval-results/race/0/ckpt_084/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a27e99539ee718e15fae75d20e5ec4621c36518e --- /dev/null +++ b/eval-results/race/0/ckpt_084/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4082074397038afb45754b9981162dd00a51d91491e3846e125f6e99965fe6be +size 1343300 diff --git a/eval-results/race/0/ckpt_084/results.json.tar.gz b/eval-results/race/0/ckpt_084/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74d1dd2de971c2b6b467ed71636fb4ef94d95724 --- /dev/null +++ b/eval-results/race/0/ckpt_084/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e75d2e9608815e8c9dee74a1624449363f86462d0674b0077f6433b85c6869a6 +size 2874 diff --git a/eval-results/race/0/ckpt_087/race.jsonl.tar.gz b/eval-results/race/0/ckpt_087/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0c61aa02054af7f6cf9dc85af0e60306364cb74 --- /dev/null +++ b/eval-results/race/0/ckpt_087/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ea85430000db5e4e88dd309a2e917c387f0d9f9879a7c8bfdbef39d889a9eb6 +size 1343406 diff --git a/eval-results/race/0/ckpt_087/results.json.tar.gz b/eval-results/race/0/ckpt_087/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..164331a1798c2adf7a13b32728f3318fbd9c7d03 --- /dev/null +++ b/eval-results/race/0/ckpt_087/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b95592961e7dab50020cef96edb66997b712e3543ff6936c142298efc3562531 +size 2874 diff --git a/eval-results/race/0/ckpt_090/race.jsonl.tar.gz b/eval-results/race/0/ckpt_090/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffc2e5dc6c567bf97ba9f4382176195086670c31 --- /dev/null +++ b/eval-results/race/0/ckpt_090/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60f7e65362aae055cb550e49cff154339cd2c7097533b22fd8d470ca9319809 +size 1343431 diff --git a/eval-results/race/0/ckpt_090/results.json.tar.gz b/eval-results/race/0/ckpt_090/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed7e01af2dfdf395bd391c19aebd0ce102234410 --- /dev/null +++ b/eval-results/race/0/ckpt_090/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:552d5b06ddfeeaa0f13b9c393896730edd775fed6db40fda62d16732ec88fa92 +size 2875 diff --git a/eval-results/race/0/ckpt_093/race.jsonl.tar.gz b/eval-results/race/0/ckpt_093/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..770c6415708502920d733419d7b5f1f1b0c92622 --- /dev/null +++ b/eval-results/race/0/ckpt_093/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f88b477ef4f3677f027fcaaa5e9c2c03557ad8c869c4c0d7a70e58939aa9eb7e +size 1343448 diff --git a/eval-results/race/0/ckpt_093/results.json.tar.gz b/eval-results/race/0/ckpt_093/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fc5b527c69a25103e5aa7b3996299e9910dcb1c --- /dev/null +++ b/eval-results/race/0/ckpt_093/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d31f3302b75ef5b6261788486a735b732756da8c8b66e06923ba1708d1a3cb +size 2876 diff --git a/eval-results/race/0/ckpt_096/race.jsonl.tar.gz b/eval-results/race/0/ckpt_096/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2fb8e4fbd32258e76d129fadc6cb85f5eb553416 --- /dev/null +++ b/eval-results/race/0/ckpt_096/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11eb12f4aec9d449b1503d187b0df85c9412b97fc86c22b4b8934740ef3acb76 +size 1343434 diff --git a/eval-results/race/0/ckpt_096/results.json.tar.gz b/eval-results/race/0/ckpt_096/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e3437097d95523fb5cc065c3cb920278b74f842 --- /dev/null +++ b/eval-results/race/0/ckpt_096/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00c6663c52c2a40d4cd6f8c09e8b2fc4a95e782ed80cf8cfd3d25b9c24fbaf64 +size 2875 diff --git a/eval-results/race/0/ckpt_099/race.jsonl.tar.gz b/eval-results/race/0/ckpt_099/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..55c1cf21a45c75612ba104c2eb9b9d3f3390785f --- /dev/null +++ b/eval-results/race/0/ckpt_099/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9233985630a6897a837a491ac36f1b6595648e0083a709e4a491f9c629d4246 +size 1343403 diff --git a/eval-results/race/0/ckpt_099/results.json.tar.gz b/eval-results/race/0/ckpt_099/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6107b5520a0186eecc18d93aed540911eabf376 --- /dev/null +++ b/eval-results/race/0/ckpt_099/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91299efdbd5218b95c044dc9c4cb0be7a5a8acffe6cb665605ea6894056119c0 +size 2904 diff --git a/eval-results/race/0/ckpt_102/race.jsonl.tar.gz b/eval-results/race/0/ckpt_102/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..87c199a2bc34dff27f1a1ca09feefae49170af0f --- /dev/null +++ b/eval-results/race/0/ckpt_102/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01604f3877eb0b75307fcfd41f21a213caa720324979f8fbc4e674ebc646f0c9 +size 1343453 diff --git a/eval-results/race/0/ckpt_102/results.json.tar.gz b/eval-results/race/0/ckpt_102/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..24eee4c8d59c2983f1c6e4fd3b1e0997864fae0a --- /dev/null +++ b/eval-results/race/0/ckpt_102/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2f69b8603c6c08bdabf1b3a85faa9a4e54b43b968f779026809a6d8ad4dc9ea +size 2876 diff --git a/eval-results/race/0/ckpt_105/race.jsonl.tar.gz b/eval-results/race/0/ckpt_105/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..98719089f12fa7cf4c8253307e6f1b28c48cab49 --- /dev/null +++ b/eval-results/race/0/ckpt_105/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:878ee7ed33f66401ba6cfb087c25b8a06158247712964b2793e16e81b2013e92 +size 1343470 diff --git a/eval-results/race/0/ckpt_105/results.json.tar.gz b/eval-results/race/0/ckpt_105/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53b1f0d611bf2c7dbf969c51dd44f16ad95ce629 --- /dev/null +++ b/eval-results/race/0/ckpt_105/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3959ec787148b6b1756b3af8819db3d7d714a83062318fa790ffcdabdc4f195f +size 2878 diff --git a/eval-results/race/0/ckpt_108/race.jsonl.tar.gz b/eval-results/race/0/ckpt_108/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e071bbc924c862bc51f4c7bbe7637a97f87aac5 --- /dev/null +++ b/eval-results/race/0/ckpt_108/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:776200dfbf28fba70b785d10ed51cf7a61e25a586c63f059aa736c6c259c9672 +size 1343359 diff --git a/eval-results/race/0/ckpt_108/results.json.tar.gz b/eval-results/race/0/ckpt_108/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..74cc2601166d2f06f2d7b8a330d6eb0194ec6f54 --- /dev/null +++ b/eval-results/race/0/ckpt_108/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f527b6000d2b381df7eae789a1de5230425be1b3e17cb2c9810b32105c387fa +size 2875 diff --git a/eval-results/race/0/ckpt_111/race.jsonl.tar.gz b/eval-results/race/0/ckpt_111/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1219921a53e26a261102e6afd092cb9879d1ba8f --- /dev/null +++ b/eval-results/race/0/ckpt_111/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:667846f7b8675c16663b0f7b1d6f2f9be7956e8e818dee754df0580028b42dc3 +size 1343411 diff --git a/eval-results/race/0/ckpt_111/results.json.tar.gz b/eval-results/race/0/ckpt_111/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed3613cebb1f572d17d31fe282d8e0a6c24a50b0 --- /dev/null +++ b/eval-results/race/0/ckpt_111/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a73b769567af581dc97fec9c050a757c1d9e7d45657ba28897f4d653918e4784 +size 2873 diff --git a/eval-results/race/0/ckpt_114/race.jsonl.tar.gz b/eval-results/race/0/ckpt_114/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ac8d4415070ce2d3729a3928bdf46a6896e5cf8b --- /dev/null +++ b/eval-results/race/0/ckpt_114/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545e5214812e160f4488927ef351bd96207046f48b72ca45d7e704ab7431afa2 +size 1343443 diff --git a/eval-results/race/0/ckpt_114/results.json.tar.gz b/eval-results/race/0/ckpt_114/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcbc8d94f3732c42c77893d037c7d311a8732b69 --- /dev/null +++ b/eval-results/race/0/ckpt_114/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f39f3fbc8fc7fbfd490cbb304047b235d29c7d29f900f325a9cb0c1dc0439baa +size 2870 diff --git a/eval-results/race/0/ckpt_117/race.jsonl.tar.gz b/eval-results/race/0/ckpt_117/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e22d39e351a854156eec4e848f23ad59e3b5bf2f --- /dev/null +++ b/eval-results/race/0/ckpt_117/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae18ae243453564c30ab10fd9a76738501941bd13a74b06f8b910ad65d1fcd2 +size 1343380 diff --git a/eval-results/race/0/ckpt_117/results.json.tar.gz b/eval-results/race/0/ckpt_117/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9b8182ec059fe167f8562716ec2d01d627689f6 --- /dev/null +++ b/eval-results/race/0/ckpt_117/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dda2b72ee5e6e49348c94a48148e25a488f3b697df46c074fd698d81b1b935b +size 2874 diff --git a/eval-results/race/0/ckpt_120/race.jsonl.tar.gz b/eval-results/race/0/ckpt_120/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7c87c98afc1c1951e0cb94101ff0051488f4f07 --- /dev/null +++ b/eval-results/race/0/ckpt_120/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a182055ae230216fab1eb350eef64626ef3f831e39cfbf962da31b8dea0fa77 +size 1343494 diff --git a/eval-results/race/0/ckpt_120/results.json.tar.gz b/eval-results/race/0/ckpt_120/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d8ecb85d3ab5558610eed9fa558f271fa459046 --- /dev/null +++ b/eval-results/race/0/ckpt_120/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c1e3d0f85e91e48b85d735bc828fc7af56dfec5dce1afd8830064120b4e778d +size 2873 diff --git a/eval-results/race/0/ckpt_123/race.jsonl.tar.gz b/eval-results/race/0/ckpt_123/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bc0522d3b98c5e07a9ec8a02fac69ee0a4ffc694 --- /dev/null +++ b/eval-results/race/0/ckpt_123/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60a92e43395ddd010b9a758124c23dc3f9330552d526bd3784be7fd328609beb +size 1343554 diff --git a/eval-results/race/0/ckpt_123/results.json.tar.gz b/eval-results/race/0/ckpt_123/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee4ca9d0bda131ad06c86aea964c9b1d3680cf80 --- /dev/null +++ b/eval-results/race/0/ckpt_123/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce89edac5104e828cc065ee54bcd5dfe6cda7ec8c7becc74938371782c128ea3 +size 2862 diff --git a/eval-results/race/0/ckpt_126/race.jsonl.tar.gz b/eval-results/race/0/ckpt_126/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c96414596ab608370002fad652eaf3fc6e2aff7 --- /dev/null +++ b/eval-results/race/0/ckpt_126/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92d967dce2e5cecb47d626543bda242029cbeed5a76eba2a4e3ea5b4e8eb825d +size 1343517 diff --git a/eval-results/race/0/ckpt_126/results.json.tar.gz b/eval-results/race/0/ckpt_126/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7153821888ad7f2ed966d68df65621d15ed3007c --- /dev/null +++ b/eval-results/race/0/ckpt_126/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95fa61252579f9912a38de6b8e87bb8f548d802b52df1c8854639b0b04fdfd0e +size 2875 diff --git a/eval-results/race/0/ckpt_129/race.jsonl.tar.gz b/eval-results/race/0/ckpt_129/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2f541aed17a76cf412f2656a275a8dc122f7c3d --- /dev/null +++ b/eval-results/race/0/ckpt_129/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31f32ee64c6a658ae19160f2f19c2ff597e21d6e4bd1119a3bfd1fa2c8309e27 +size 1343544 diff --git a/eval-results/race/0/ckpt_129/results.json.tar.gz b/eval-results/race/0/ckpt_129/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4c40ab6a48edce38c37e175c850dbf18950023d1 --- /dev/null +++ b/eval-results/race/0/ckpt_129/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0430b373da7c748c42b4fb2d41e81af80250bef82730f82dc5b8ee7e06e35f2 +size 2875 diff --git a/eval-results/race/0/ckpt_132/race.jsonl.tar.gz b/eval-results/race/0/ckpt_132/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5842328d950131a496750d12927200f159417cc4 --- /dev/null +++ b/eval-results/race/0/ckpt_132/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdeffec10097aa28e33b7f08fd4d81e73278dcdd55f3860212afd716b084343d +size 1343435 diff --git a/eval-results/race/0/ckpt_132/results.json.tar.gz b/eval-results/race/0/ckpt_132/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4523c6ed2eba2a8354438a88c886b0e5f3562bb1 --- /dev/null +++ b/eval-results/race/0/ckpt_132/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78795f787fbe401f5f6c73ba3b01f2c05b971576fb2b389c1a48e189f60f46c2 +size 2874 diff --git a/eval-results/race/0/ckpt_135/race.jsonl.tar.gz b/eval-results/race/0/ckpt_135/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b81a60e39629f55a2661cf3460d193931bab0dc --- /dev/null +++ b/eval-results/race/0/ckpt_135/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeb3cabc35239f1286a7dc48aaa26a4c164a40af3469e70051e79d9b142e4d4b +size 1343394 diff --git a/eval-results/race/0/ckpt_135/results.json.tar.gz b/eval-results/race/0/ckpt_135/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8616d66241f2517748ee9e101cf7abec7900549d --- /dev/null +++ b/eval-results/race/0/ckpt_135/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7856baf069187420d8331ccfbbb5700293350470fe2f1d7a6e545111c472a688 +size 2877 diff --git a/eval-results/race/0/ckpt_138/race.jsonl.tar.gz b/eval-results/race/0/ckpt_138/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..045a00a0c8325b1a9c5a044d38d8709c905cf00b --- /dev/null +++ b/eval-results/race/0/ckpt_138/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315400977487db4cd4c7a8f0c7d2cd4af1eeeda2322d9b4623ee886851ad430d +size 1343245 diff --git a/eval-results/race/0/ckpt_138/results.json.tar.gz b/eval-results/race/0/ckpt_138/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8e1d625bed01e277a23dbbd072bd06e6b774adb --- /dev/null +++ b/eval-results/race/0/ckpt_138/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f4a5439d96f311fb7f4b71002b732525d5debf6ff872f22558e1fd5849095a +size 2873 diff --git a/eval-results/race/0/ckpt_141/race.jsonl.tar.gz b/eval-results/race/0/ckpt_141/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1cca1c133a6dca376c3e3af053e36cafd52715b --- /dev/null +++ b/eval-results/race/0/ckpt_141/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a0958903f744ec25e7a1e9f5f0021dd5436f1a984c541edf648cc869c6a8303 +size 1343520 diff --git a/eval-results/race/0/ckpt_141/results.json.tar.gz b/eval-results/race/0/ckpt_141/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db563cd6af423f4f083b5421d8ec7511175da6a9 --- /dev/null +++ b/eval-results/race/0/ckpt_141/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3b2242309becf4f64885afd7b29a10fff6ac5e967388c1e53add683c03c0ec +size 2876 diff --git a/eval-results/race/0/ckpt_144/race.jsonl.tar.gz b/eval-results/race/0/ckpt_144/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdd074e8479d82a603633ed8bb294870586e1b38 --- /dev/null +++ b/eval-results/race/0/ckpt_144/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a30bec55131f9383ab77f2d8dd26d9a93393de7d5e2571ab7a75640f2a050909 +size 1343449 diff --git a/eval-results/race/0/ckpt_144/results.json.tar.gz b/eval-results/race/0/ckpt_144/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be97ce4bfbc0f028efb876fa9c032a0a1a6644a8 --- /dev/null +++ b/eval-results/race/0/ckpt_144/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919538eda0859958d97b75b06029f786c36283dd71f4370b4dd741b68243a8ce +size 2876 diff --git a/eval-results/race/0/ckpt_147/race.jsonl.tar.gz b/eval-results/race/0/ckpt_147/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48c9d9ce769f8f5354d9f74d078a1aa1c71fe5ea --- /dev/null +++ b/eval-results/race/0/ckpt_147/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed8f0bef3f46466b45eeb43ac11913b5debfc5530e0efc6f391b5645ab96b46 +size 1343371 diff --git a/eval-results/race/0/ckpt_147/results.json.tar.gz b/eval-results/race/0/ckpt_147/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..435195768b583e75252de984b7859ce831ad82d6 --- /dev/null +++ b/eval-results/race/0/ckpt_147/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5fe092118680b193defa709870d48bb5b75e7faf74a290f1c2539236fd9c6ba +size 2879 diff --git a/eval-results/race/0/ckpt_150/race.jsonl.tar.gz b/eval-results/race/0/ckpt_150/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b21d7419dfb1fb08decd7ca657f3ca8c7854add --- /dev/null +++ b/eval-results/race/0/ckpt_150/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e71575901f0def8b8e572dbd5884813c87d82ac633d5220f90e4c8760652507b +size 1343449 diff --git a/eval-results/race/0/ckpt_150/results.json.tar.gz b/eval-results/race/0/ckpt_150/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5d2031c570582dfe09935fd584adafd8d3affd3 --- /dev/null +++ b/eval-results/race/0/ckpt_150/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a039ef9a9887eb7a8aa4f5e16268c850595151f884204abaa0b0f1dd2917ae43 +size 2875 diff --git a/eval-results/race/0/ckpt_153/race.jsonl.tar.gz b/eval-results/race/0/ckpt_153/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..afb21eae38d7f00bb7200e986d12a0487ca848f5 --- /dev/null +++ b/eval-results/race/0/ckpt_153/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed9010820972a486c60fb7644061a308cf0e57b18d4789182349bd4fcca1187 +size 1343440 diff --git a/eval-results/race/0/ckpt_153/results.json.tar.gz b/eval-results/race/0/ckpt_153/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03ac02fd8ed213c925694abec4d77a8ae95f6d28 --- /dev/null +++ b/eval-results/race/0/ckpt_153/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b1d6926f00de173a55d3048615b955da781b884d3ec2f66c3f3376d5a5db4ba +size 2877 diff --git a/eval-results/race/0/ckpt_156/race.jsonl.tar.gz b/eval-results/race/0/ckpt_156/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8ad2405e7e44fce41b68971aadf33922e3d1916 --- /dev/null +++ b/eval-results/race/0/ckpt_156/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a828d035b525bc34698dbd5e42ad4a3c215fbefb80712d0d1256cadb83f10713 +size 1343470 diff --git a/eval-results/race/0/ckpt_156/results.json.tar.gz b/eval-results/race/0/ckpt_156/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7d8f3a6e2a0efa3514ef84f001fbc8dd43b29e92 --- /dev/null +++ b/eval-results/race/0/ckpt_156/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b7364f15b44de86f2cec05445b04e17bef55af437411978116313127707e2d7 +size 2876 diff --git a/eval-results/race/0/ckpt_159/race.jsonl.tar.gz b/eval-results/race/0/ckpt_159/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..324e6e0b7ce77e9aebe8d162bea4e678861e53d0 --- /dev/null +++ b/eval-results/race/0/ckpt_159/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d9848f5649d781a9d89d2bac634c9ff209671e93968ceff48ed827ee3741cb +size 1343504 diff --git a/eval-results/race/0/ckpt_159/results.json.tar.gz b/eval-results/race/0/ckpt_159/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..64e575b70a7ba40823c04b54e8bbb8767b550f5f --- /dev/null +++ b/eval-results/race/0/ckpt_159/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c833a18869323559d0cc9eec74c24f7f42373ea623cba2a7a93fe64e5021cdf +size 2873 diff --git a/eval-results/race/0/ckpt_162/race.jsonl.tar.gz b/eval-results/race/0/ckpt_162/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b9b38801242e7c19d9675c17fac173c9bfae3ba --- /dev/null +++ b/eval-results/race/0/ckpt_162/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e033f4d63a36134d061794212c0018e8a6184a248a14189c4d900e57911e6f1 +size 1343392 diff --git a/eval-results/race/0/ckpt_162/results.json.tar.gz b/eval-results/race/0/ckpt_162/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..47c25120d5bb7192912905852cccd086ec8cf0cd --- /dev/null +++ b/eval-results/race/0/ckpt_162/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:227cf52edeed678bd1db3ee5c44157b6a465d7ac860330f8ff8ac28df8871ec5 +size 2876 diff --git a/eval-results/race/0/ckpt_165/race.jsonl.tar.gz b/eval-results/race/0/ckpt_165/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d774752a9de97965d668c686d6c24b131b9a49d3 --- /dev/null +++ b/eval-results/race/0/ckpt_165/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13899b448bb4bf854659b511edb5d0e973d31083d49a225781966fd4c35f518e +size 1343439 diff --git a/eval-results/race/0/ckpt_165/results.json.tar.gz b/eval-results/race/0/ckpt_165/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5c1b7e313a546fc6ad769b32baacabb68b26ac0 --- /dev/null +++ b/eval-results/race/0/ckpt_165/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea56718a083c69857a326a4be6a3de5dc1bf24806f38dcb8403e6b8afe15a81e +size 2875 diff --git a/eval-results/race/0/ckpt_168/race.jsonl.tar.gz b/eval-results/race/0/ckpt_168/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3c6e047347c2be90a5f055cf50935aaf826bb4d --- /dev/null +++ b/eval-results/race/0/ckpt_168/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb7c57b76bee8514e62db95dfab1b1da7517cfc631800ac59bac0d3a89198cc +size 1343433 diff --git a/eval-results/race/0/ckpt_168/results.json.tar.gz b/eval-results/race/0/ckpt_168/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0474132cdf0afe46f08da5d3050b73750d42808 --- /dev/null +++ b/eval-results/race/0/ckpt_168/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98a977844d6e9af9ffb7d4e0f75c0a5791a946cea4d077b5bc0a608e68163b08 +size 2875 diff --git a/eval-results/race/0/ckpt_171/race.jsonl.tar.gz b/eval-results/race/0/ckpt_171/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc0c9c7bb29168a28ff47e6ecd8e0466d89d8ce7 --- /dev/null +++ b/eval-results/race/0/ckpt_171/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4081205351b8c23572cbf93d6c6077eafca7989b5a38dbe3e25455d73c16966 +size 1343352 diff --git a/eval-results/race/0/ckpt_171/results.json.tar.gz b/eval-results/race/0/ckpt_171/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..263cdfea9ab460175b76a39bfefcc223f8e2883d --- /dev/null +++ b/eval-results/race/0/ckpt_171/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476233f5379829a49c5c568f48f3b7ad8028a9755cae99f16d55c6ada95f90f3 +size 2876 diff --git a/eval-results/race/0/ckpt_174/race.jsonl.tar.gz b/eval-results/race/0/ckpt_174/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d0fcb9964f7111ce2e7ef81867c7c5942d8ca65 --- /dev/null +++ b/eval-results/race/0/ckpt_174/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bea8537da25beff925f8326700ddcedb41394e928860b46730e1581d2f485471 +size 1343326 diff --git a/eval-results/race/0/ckpt_174/results.json.tar.gz b/eval-results/race/0/ckpt_174/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dd62be9427a886d04ff401f59b97278af00bb01e --- /dev/null +++ b/eval-results/race/0/ckpt_174/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fad9de8a621169eda674c26d2743b6d45082d24b69cf6e703209a23c61b59a70 +size 2875 diff --git a/eval-results/race/0/ckpt_177/race.jsonl.tar.gz b/eval-results/race/0/ckpt_177/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7267bed0b35b7c2199a88caaeb3820c1c69d175 --- /dev/null +++ b/eval-results/race/0/ckpt_177/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fde2c04be32da3a69d457b241a5d63269bcaf41102db0db7e9f2bd507bfe768 +size 1343423 diff --git a/eval-results/race/0/ckpt_177/results.json.tar.gz b/eval-results/race/0/ckpt_177/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ccece8b7d236c21c89015d424594a638748d3f4 --- /dev/null +++ b/eval-results/race/0/ckpt_177/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d147b1543fb5024172282f43ba328ef9e73efb2b38486886446d970ba4b452c +size 2876 diff --git a/eval-results/race/0/ckpt_180/race.jsonl.tar.gz b/eval-results/race/0/ckpt_180/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c74ac474b38ffa9b1a86f0346d4a26a8eeb6043 --- /dev/null +++ b/eval-results/race/0/ckpt_180/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06779f3a70fd69faccae3fb9df9c76bace49b6a004c5f6281851dc230f27a9ad +size 1343437 diff --git a/eval-results/race/0/ckpt_180/results.json.tar.gz b/eval-results/race/0/ckpt_180/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65107452858ef78e73a6277eec0af4f0f1c83ab4 --- /dev/null +++ b/eval-results/race/0/ckpt_180/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92419a28cf2610462d60acd47a6a1cb47d4659a65e4fb16f0a9e3503f42f825b +size 2876 diff --git a/eval-results/race/0/ckpt_183/race.jsonl.tar.gz b/eval-results/race/0/ckpt_183/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c03c602ab6d1392c689fc90676d7bb7bfbc2fb8 --- /dev/null +++ b/eval-results/race/0/ckpt_183/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27fc2b8f7149f46d938c97c0348aea15998c08bc61e9c73a0be946def618df3b +size 1343479 diff --git a/eval-results/race/0/ckpt_183/results.json.tar.gz b/eval-results/race/0/ckpt_183/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b25f41a9f044c877a617aab5bba718eec31b2dab --- /dev/null +++ b/eval-results/race/0/ckpt_183/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5d2782cb74d0611e8089d43c138899c7bb1896f744ec49defddf6f0aa45a7da +size 2875 diff --git a/eval-results/race/0/ckpt_186/race.jsonl.tar.gz b/eval-results/race/0/ckpt_186/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..35ea5383f7797204fd0c41f5defb2adce2a295be --- /dev/null +++ b/eval-results/race/0/ckpt_186/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53da83aee0d50c32221cc0d1e924316ccd3a2d0df85022a018a3db0af78fa882 +size 1343458 diff --git a/eval-results/race/0/ckpt_186/results.json.tar.gz b/eval-results/race/0/ckpt_186/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f730e09ae4231252f2e33fbb60218e6bc5b9e03e --- /dev/null +++ b/eval-results/race/0/ckpt_186/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d416427da6fc65c1a8485a14fc01c202d6a4b9b1d15c42e0da2eb79cee7b30e +size 2874 diff --git a/eval-results/race/0/ckpt_189/race.jsonl.tar.gz b/eval-results/race/0/ckpt_189/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec1a58f070ad7f1fc43c1a919ce8f18b449e56ba --- /dev/null +++ b/eval-results/race/0/ckpt_189/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ace963a371bbb0c5223bba7d546b8d7c860070ada240af7ab0848146b9f07f +size 1343328 diff --git a/eval-results/race/0/ckpt_189/results.json.tar.gz b/eval-results/race/0/ckpt_189/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0744d304934c69ba225073f731bf36ededb5e8e5 --- /dev/null +++ b/eval-results/race/0/ckpt_189/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce61d33ad42f9017c7b2904b5bd44af1934849af9fc7e284f1dc28c9885aaaa1 +size 2874 diff --git a/eval-results/race/0/ckpt_192/race.jsonl.tar.gz b/eval-results/race/0/ckpt_192/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80d7486e68dce756668e42b00a245e4bff154047 --- /dev/null +++ b/eval-results/race/0/ckpt_192/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:968d3e3d967b6a993cce9a98867dab72579f229b38adb7e1ee6880cb10ef15cd +size 1343451 diff --git a/eval-results/race/0/ckpt_192/results.json.tar.gz b/eval-results/race/0/ckpt_192/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c84d6af9b3cdd414beaac9d9e3994ff7f6ec7248 --- /dev/null +++ b/eval-results/race/0/ckpt_192/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d96cc4de5ddb89f242057049ed74b101ac0cf487b1d11e0cd5d0ae372adf95df +size 2877 diff --git a/eval-results/race/0/ckpt_195/race.jsonl.tar.gz b/eval-results/race/0/ckpt_195/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19c40e775ee53412d3bba494217d28bcbe9390c5 --- /dev/null +++ b/eval-results/race/0/ckpt_195/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:516d714f03a4509ca24644091526c32bbaf4ed0e8e3fe198394ef421bc0532f2 +size 1343448 diff --git a/eval-results/race/0/ckpt_195/results.json.tar.gz b/eval-results/race/0/ckpt_195/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ad606ef4da5d2a4890cb1bdafe98e56e7b94253f --- /dev/null +++ b/eval-results/race/0/ckpt_195/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b918c5dc4102a2934514d1735560077ebee8f1a701a28bb613f808dc948c947 +size 2875 diff --git a/eval-results/race/0/ckpt_198/race.jsonl.tar.gz b/eval-results/race/0/ckpt_198/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bddd7a3f7c433cc661bab9e959999abc6be49c08 --- /dev/null +++ b/eval-results/race/0/ckpt_198/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71992cf814b8bd20f154e8ecc9ad7307848789ba1a50881d525a1bb9f0a9056b +size 1343456 diff --git a/eval-results/race/0/ckpt_198/results.json.tar.gz b/eval-results/race/0/ckpt_198/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..83a2b5267c7bbe05879b40d72d2c718d859b90e9 --- /dev/null +++ b/eval-results/race/0/ckpt_198/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b447707ba386a72bcece25d8f08efe8ff1f77b747f00a053e7f2ced36e5f7b59 +size 2875 diff --git a/eval-results/race/0/ckpt_201/race.jsonl.tar.gz b/eval-results/race/0/ckpt_201/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8825fb13365b07f2a66fa608b1cd5ed9debd338 --- /dev/null +++ b/eval-results/race/0/ckpt_201/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a441e20b7d867c90552df635a5e76f12dbee5c3e591ba68a7f15483cb0c8515 +size 1343520 diff --git a/eval-results/race/0/ckpt_201/results.json.tar.gz b/eval-results/race/0/ckpt_201/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e2647dd36f929266376fb6c324e6e9bb6a5b909 --- /dev/null +++ b/eval-results/race/0/ckpt_201/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:126ba5fb7e05c4c2bc363a49671122a367c9a08c540725495d326498105c3997 +size 2875 diff --git a/eval-results/race/0/ckpt_204/race.jsonl.tar.gz b/eval-results/race/0/ckpt_204/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f9bce87461c0739491ac01c0d2da8ff8a413347d --- /dev/null +++ b/eval-results/race/0/ckpt_204/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc7c1459bd5b67fd0d76b95f30e6471750f65aa2df52a4c2fcf715e3abdf633f +size 1343374 diff --git a/eval-results/race/0/ckpt_204/results.json.tar.gz b/eval-results/race/0/ckpt_204/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..082d15f5714bf8c25607c1b6e730e8a4e8e90096 --- /dev/null +++ b/eval-results/race/0/ckpt_204/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf383b4f4e74b04d8934bad75f3ebe98ec99b77119f7d851444f064c774598e +size 2875 diff --git a/eval-results/race/0/ckpt_207/race.jsonl.tar.gz b/eval-results/race/0/ckpt_207/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0f93d11cf3385f2e03b6668e49e07746cc9227e --- /dev/null +++ b/eval-results/race/0/ckpt_207/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:755e001df4bbfa4c7b50c984bc136ddcd5aed744724cc03d41f51965208ad9b9 +size 1343416 diff --git a/eval-results/race/0/ckpt_207/results.json.tar.gz b/eval-results/race/0/ckpt_207/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68a26b1f1ba034c95962bb0b5cf23f11d050de7b --- /dev/null +++ b/eval-results/race/0/ckpt_207/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96e0d85e6f75fc24f0b627fb09435cf0dae9655e64bc1277a3f4b4c3dcede89c +size 2876 diff --git a/eval-results/race/0/ckpt_210/race.jsonl.tar.gz b/eval-results/race/0/ckpt_210/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee2b8d6681a23855628b564a11ede98fc86bd402 --- /dev/null +++ b/eval-results/race/0/ckpt_210/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e182d17401062294148f84264b68a435fe38dea267a175d449d2384f08be7893 +size 1343484 diff --git a/eval-results/race/0/ckpt_210/results.json.tar.gz b/eval-results/race/0/ckpt_210/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2a60dabc25c41118c03d4b13e9bb540ce3672f7 --- /dev/null +++ b/eval-results/race/0/ckpt_210/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a52dc1b7540b8cbb4e8db8fe082c3b99035602bd0969357c4964494cf1fb56c +size 2875 diff --git a/eval-results/race/0/ckpt_213/race.jsonl.tar.gz b/eval-results/race/0/ckpt_213/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..777823ba5b15a7b99263c69a957f2140a75b147d --- /dev/null +++ b/eval-results/race/0/ckpt_213/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55656102e9fc976643cd873b6ffc5acc53ddc3c954483b80c57623e55c5b5dbf +size 1343423 diff --git a/eval-results/race/0/ckpt_213/results.json.tar.gz b/eval-results/race/0/ckpt_213/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85617dd1699eeef3b4c28ba9fa1e65c547200a5b --- /dev/null +++ b/eval-results/race/0/ckpt_213/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03f5734ef748df19d4c0d948ac34ff0610e6b6a2bcf3e7dab5aba910138eb499 +size 2873 diff --git a/eval-results/race/0/ckpt_216/race.jsonl.tar.gz b/eval-results/race/0/ckpt_216/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..231d0e14b1b053c94ae0e73b6a21568a474129e8 --- /dev/null +++ b/eval-results/race/0/ckpt_216/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc1a1a39a3085ac66dddf38deff3c09c0e4ae661b74c715fee0f309399ffae12 +size 1343520 diff --git a/eval-results/race/0/ckpt_216/results.json.tar.gz b/eval-results/race/0/ckpt_216/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..da9fb86659bef77a68bcebff5927f11b0be69d57 --- /dev/null +++ b/eval-results/race/0/ckpt_216/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65485e8f5354a95c9f43892ca9debb3eaa53dfad44e11231e994d6621e87f84f +size 2870 diff --git a/eval-results/race/0/ckpt_219/race.jsonl.tar.gz b/eval-results/race/0/ckpt_219/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..785c19c6b1287ddd0c2dd00f36cb745d7cec7080 --- /dev/null +++ b/eval-results/race/0/ckpt_219/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd32ed994a30d84a0a42f9e428860d86adb9d80296919681f8ab3646e6b77c3d +size 1343328 diff --git a/eval-results/race/0/ckpt_219/results.json.tar.gz b/eval-results/race/0/ckpt_219/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5615eef92bc80def3b6c850c7b08d0558260119d --- /dev/null +++ b/eval-results/race/0/ckpt_219/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac6e2eb50c5a0c8e20a31d4e3a71e1ed6d69c8fb84e3a84b9aa9784eeefe0bb6 +size 2864 diff --git a/eval-results/race/0/ckpt_222/race.jsonl.tar.gz b/eval-results/race/0/ckpt_222/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1daa86edc0e786941b4edfc61dec5b36c81e03a5 --- /dev/null +++ b/eval-results/race/0/ckpt_222/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c2484dbaae3b06b65dd0c2dd6b7d8447ba6df32a5d7ac152a83b8731a9a9f34 +size 1343348 diff --git a/eval-results/race/0/ckpt_222/results.json.tar.gz b/eval-results/race/0/ckpt_222/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a891e44bb9783c28230054b1c3bb4a03a5b814fb --- /dev/null +++ b/eval-results/race/0/ckpt_222/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a38f3c5bd20d23d239905faa36985a1744ddecb1c1e284e77201a3c7fdcff2 +size 2873 diff --git a/eval-results/race/0/ckpt_225/race.jsonl.tar.gz b/eval-results/race/0/ckpt_225/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65de3ef27e6b0f8c429dc9bf5388a2fb570bd05e --- /dev/null +++ b/eval-results/race/0/ckpt_225/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7fc8e328123c09ce6332ea25beac37b94e469eb10dd59d562d1fd9d73edaeef +size 1343433 diff --git a/eval-results/race/0/ckpt_225/results.json.tar.gz b/eval-results/race/0/ckpt_225/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fff84d052a8e72e706ce0d2b62c830c869c7d74 --- /dev/null +++ b/eval-results/race/0/ckpt_225/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03d8abf65c3da34cefc618b3a03bbea1576fe0c3174ae09574305b088f5ad62c +size 2874 diff --git a/eval-results/race/0/ckpt_228/race.jsonl.tar.gz b/eval-results/race/0/ckpt_228/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..097554efebb2f9ff021498da094176a7d7110291 --- /dev/null +++ b/eval-results/race/0/ckpt_228/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bdc4ab52fc6b6c7940f2129fb820ad1c1c433ac70ebd44c8f9dac7e0f500ad3 +size 1343581 diff --git a/eval-results/race/0/ckpt_228/results.json.tar.gz b/eval-results/race/0/ckpt_228/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70f3dbf5f691dea6ba75d6c96bd1604c9ce67940 --- /dev/null +++ b/eval-results/race/0/ckpt_228/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcba5b4792e4fa89f52ee03d7af54c89c989de3329a49c2e2833f95b093f5133 +size 2875 diff --git a/eval-results/race/0/ckpt_231/race.jsonl.tar.gz b/eval-results/race/0/ckpt_231/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..071583bb670e715e9b22fd35f9e75692a6f80764 --- /dev/null +++ b/eval-results/race/0/ckpt_231/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a807306d422e72bae2610c93867f92c2968b5d62c898fddbbf34b4981b35dde4 +size 1343466 diff --git a/eval-results/race/0/ckpt_231/results.json.tar.gz b/eval-results/race/0/ckpt_231/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b56b79e091e37e94bfb8ad04b3773073c2b8083d --- /dev/null +++ b/eval-results/race/0/ckpt_231/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cbeb54edcce19aaaae323f3e14185111f4f7da8bff24fdff3f1512cbbb7e290 +size 2878 diff --git a/eval-results/race/0/ckpt_234/race.jsonl.tar.gz b/eval-results/race/0/ckpt_234/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f19d24bc9132ea58d56a26b74c370e1a0fe1754 --- /dev/null +++ b/eval-results/race/0/ckpt_234/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea26a9227a0a68eb8806008340463f15b0f9a8d49aa05dfbe5db2292cea6e44 +size 1343492 diff --git a/eval-results/race/0/ckpt_234/results.json.tar.gz b/eval-results/race/0/ckpt_234/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa73a12de9dd01f0e26b261f035c7a21ea5268a1 --- /dev/null +++ b/eval-results/race/0/ckpt_234/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b367dfddc7f7ac447a74e5c20b38c9d4e89e2ef63cb8cde9fd9d3a5c978ddccf +size 2875 diff --git a/eval-results/race/0/ckpt_237/race.jsonl.tar.gz b/eval-results/race/0/ckpt_237/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f13636141e5c582ab272ae56bce83e98c6990d6 --- /dev/null +++ b/eval-results/race/0/ckpt_237/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66f9562b2d2523b0a2d8880d03228a0cea8da5fff5789b7a4a9199a5e40f9e0d +size 1343503 diff --git a/eval-results/race/0/ckpt_237/results.json.tar.gz b/eval-results/race/0/ckpt_237/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ee646aa51e83458ac3fab37c2c3abf4f711fb72 --- /dev/null +++ b/eval-results/race/0/ckpt_237/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59f6adbca058824b78df3dde0b4279ed24f38b33723aa08a693bac7242a4d208 +size 2876 diff --git a/eval-results/race/0/ckpt_240/race.jsonl.tar.gz b/eval-results/race/0/ckpt_240/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30f821f5cc6f5953bebf3b91b2722dd7d28966de --- /dev/null +++ b/eval-results/race/0/ckpt_240/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40ffcf8895cfc2023bab9543b4532979eaf70c8c4170fc03b095341804caa63f +size 1343492 diff --git a/eval-results/race/0/ckpt_240/results.json.tar.gz b/eval-results/race/0/ckpt_240/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..112bb4f693009b81d2e4b848bfb9925ea460c231 --- /dev/null +++ b/eval-results/race/0/ckpt_240/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ee7b6afa2b765db8cc6a68099c6e9d5c86e0fafd0f805ee20c8c0cdd6bc109d +size 2876 diff --git a/eval-results/race/0/ckpt_243/race.jsonl.tar.gz b/eval-results/race/0/ckpt_243/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f59748ce530b47c9e53ae14cebf4060348e21a18 --- /dev/null +++ b/eval-results/race/0/ckpt_243/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c4a1ea80c923b03db275f0a567eab68209dbe98e5595673e10f28b1bd46ed8 +size 1343446 diff --git a/eval-results/race/0/ckpt_243/results.json.tar.gz b/eval-results/race/0/ckpt_243/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8d05d03aee4c1fa5b7b5df06d56ea2465e80fad --- /dev/null +++ b/eval-results/race/0/ckpt_243/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e5be12406e0cbb2cd2ca4d79c26378c2bddb51aaa920425df75a5a0d786a7fa +size 2873 diff --git a/eval-results/race/0/ckpt_246/race.jsonl.tar.gz b/eval-results/race/0/ckpt_246/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5234f3c2c8a8b5cc8b5b97f5e6e2b3a5b5165d90 --- /dev/null +++ b/eval-results/race/0/ckpt_246/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09a3ae0889f5003d5bf6ce69bea0dd5c2820d477ccb6b35594e46b5f00eccbd +size 1343406 diff --git a/eval-results/race/0/ckpt_246/results.json.tar.gz b/eval-results/race/0/ckpt_246/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b01dd17e62951d7941cba532f787f32db38a3d2d --- /dev/null +++ b/eval-results/race/0/ckpt_246/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df51458d18dcf74c7629c888656b02ccffa032c65fbc834bd233907ea78697fe +size 2875 diff --git a/eval-results/race/0/ckpt_249/race.jsonl.tar.gz b/eval-results/race/0/ckpt_249/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d53cdb780c68d4d3ec0ee560dfd2d0eda7275dcb --- /dev/null +++ b/eval-results/race/0/ckpt_249/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28fc167fe75ccb84989b0e859484cbb10a59494dcf6bd5379238d153f5019118 +size 1343465 diff --git a/eval-results/race/0/ckpt_249/results.json.tar.gz b/eval-results/race/0/ckpt_249/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..376bce5c19a2f789ae1299a35479716f5059779c --- /dev/null +++ b/eval-results/race/0/ckpt_249/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02c25aa6003e6257eb20678ea14d1a08cb435c1cf5dedb19e469d108cb448695 +size 2908 diff --git a/eval-results/race/0/ckpt_252/race.jsonl.tar.gz b/eval-results/race/0/ckpt_252/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f540bdd3819429f8444f72c268fc8751e90f0009 --- /dev/null +++ b/eval-results/race/0/ckpt_252/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b5498791bbe399df99d7ecc3de995115c2c3d8f9348833befe8a31e2515a4f +size 1343387 diff --git a/eval-results/race/0/ckpt_252/results.json.tar.gz b/eval-results/race/0/ckpt_252/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d965afb8d4bb74ba16de75dbc59efe9f3706ee20 --- /dev/null +++ b/eval-results/race/0/ckpt_252/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdef70509e5c37fa7f0f7423cf1a08fa5aee04a1e46c9be69058d4d3cad4b126 +size 2875 diff --git a/eval-results/race/0/ckpt_255/race.jsonl.tar.gz b/eval-results/race/0/ckpt_255/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6a6e35bf8995ae082d7a33489743b5a054fd2f1 --- /dev/null +++ b/eval-results/race/0/ckpt_255/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a70e2dc7e6a9a44ff29282c5f2bfbacac18702a67f5daeab24e4573ad7b9b3 +size 1343499 diff --git a/eval-results/race/0/ckpt_255/results.json.tar.gz b/eval-results/race/0/ckpt_255/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c24adf177e4aa537fcf2d8925d1555144f3e6ec --- /dev/null +++ b/eval-results/race/0/ckpt_255/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d550a39bd9dd48330e807e3339525ee083272acbd759ba7121d83da64b4c4f +size 2873 diff --git a/eval-results/race/0/ckpt_258/race.jsonl.tar.gz b/eval-results/race/0/ckpt_258/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec0328ec82b095e6f432f0afc729db7bad3235c9 --- /dev/null +++ b/eval-results/race/0/ckpt_258/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d18ea6708c5e69cb5776ffc9644997aee674134ca4bf53c06027388ce8072ee +size 1343478 diff --git a/eval-results/race/0/ckpt_258/results.json.tar.gz b/eval-results/race/0/ckpt_258/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6dce31041395f909752d1ec4227ee38c5d90e2ae --- /dev/null +++ b/eval-results/race/0/ckpt_258/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c604017fcf8312d56f2b611c4efab02a4752339a8238864e73eef0bb8b1ed378 +size 2908 diff --git a/eval-results/race/0/ckpt_261/race.jsonl.tar.gz b/eval-results/race/0/ckpt_261/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0c0197ede2d5074c407b0ed1fcf8d2c275145ba --- /dev/null +++ b/eval-results/race/0/ckpt_261/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22eb529e65540529313fc92a93d732e9fcc34f0da1376f3c1bb71ae7feb004a4 +size 1343485 diff --git a/eval-results/race/0/ckpt_261/results.json.tar.gz b/eval-results/race/0/ckpt_261/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..943537c60acf9975fc490a2cecbda20b500034fe --- /dev/null +++ b/eval-results/race/0/ckpt_261/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:599c6d8a6073a88c168339e929c051a4047d3ecae25ff44a080e916525907509 +size 2874 diff --git a/eval-results/race/0/ckpt_264/race.jsonl.tar.gz b/eval-results/race/0/ckpt_264/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82e5230651690855600f03314e96e0c137eae163 --- /dev/null +++ b/eval-results/race/0/ckpt_264/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aa7709168f0f7465f53a35e8f9eb60a72011df47d00f383b2d06b6e817d7661 +size 1343543 diff --git a/eval-results/race/0/ckpt_264/results.json.tar.gz b/eval-results/race/0/ckpt_264/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0bcb161c4c354d2b163c4e63e94c2eac19fedcb1 --- /dev/null +++ b/eval-results/race/0/ckpt_264/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f14761ab68dc9558170a7086460bcdff6162a21d2a48c8cb2e7a9e2f0f9134ea +size 2874 diff --git a/eval-results/race/0/ckpt_267/race.jsonl.tar.gz b/eval-results/race/0/ckpt_267/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..329641c038c3bf54251c3d5aa4d7d31626f56993 --- /dev/null +++ b/eval-results/race/0/ckpt_267/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d04b4580df170319c73ae6ba514fd9b977e4f1dd5f3e5eff31596b69d79de1d +size 1343510 diff --git a/eval-results/race/0/ckpt_267/results.json.tar.gz b/eval-results/race/0/ckpt_267/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92e24e0849ceaad4508ccec018c6d2e1fd786dac --- /dev/null +++ b/eval-results/race/0/ckpt_267/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a79ac8bef2a290868a650038be5aeef7735644018d0124e35da2af8f42d65d5c +size 2876 diff --git a/eval-results/race/0/ckpt_270/race.jsonl.tar.gz b/eval-results/race/0/ckpt_270/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f8fdd8268ca6aca322aace251064fb33b99f880 --- /dev/null +++ b/eval-results/race/0/ckpt_270/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9db76fd5f430baa40ff593b11478e1ec9b2b79edb5ec84a4cbd4c4ca5afe3ca1 +size 1343536 diff --git a/eval-results/race/0/ckpt_270/results.json.tar.gz b/eval-results/race/0/ckpt_270/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c5f7d2d02029c35879da1ff1d12da62cf68bb0a --- /dev/null +++ b/eval-results/race/0/ckpt_270/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa750beee5073e89b6924ac38d00837a4cf5e9b08ab34b2454aaf0efa1b87bee +size 2876 diff --git a/eval-results/race/0/ckpt_273/race.jsonl.tar.gz b/eval-results/race/0/ckpt_273/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a896c50a8109ce318f60ec7ec04084a5f651ea4 --- /dev/null +++ b/eval-results/race/0/ckpt_273/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd93964f648a48a6cdb36c726104c324f1717cf6b00fb93a5b998af286e2bf6c +size 1343502 diff --git a/eval-results/race/0/ckpt_273/results.json.tar.gz b/eval-results/race/0/ckpt_273/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..79b6f3029c64474bfe5b440da59e0ff8431b51d4 --- /dev/null +++ b/eval-results/race/0/ckpt_273/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04f115b203c289ba50e05b84e29da43a22a09b53faa8a5c7f957fb36232a7cfe +size 2878 diff --git a/eval-results/race/0/ckpt_276/race.jsonl.tar.gz b/eval-results/race/0/ckpt_276/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..877cec7fb217040a497493c2d7201e719af95cc3 --- /dev/null +++ b/eval-results/race/0/ckpt_276/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5ca91076988cbc8749bb8fbee7a7bf873923ab363d36a62ac64310b42a88106 +size 1343553 diff --git a/eval-results/race/0/ckpt_276/results.json.tar.gz b/eval-results/race/0/ckpt_276/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..569c15d250e781bdffc27270b1dc2f78b5e7d958 --- /dev/null +++ b/eval-results/race/0/ckpt_276/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb5b092679eb7095b7db8ab505521258c9d756206c52bfd567252734d2f0082 +size 2876 diff --git a/eval-results/race/0/ckpt_279/race.jsonl.tar.gz b/eval-results/race/0/ckpt_279/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed798416ba499d1daf31bf1953a3d55641d7cc62 --- /dev/null +++ b/eval-results/race/0/ckpt_279/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78b8f7bf90d7c738f57b9a1c25a996ce63649e942f1f7aff02dceae5d7be21cd +size 1343453 diff --git a/eval-results/race/0/ckpt_279/results.json.tar.gz b/eval-results/race/0/ckpt_279/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d855a561ad2d44f0cb49e3bca585e56f4a0a9bf9 --- /dev/null +++ b/eval-results/race/0/ckpt_279/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a2fbe2103e622353f8d344501c9bde9c157148f47128b51234c450ce32a8769 +size 2909 diff --git a/eval-results/race/0/ckpt_282/race.jsonl.tar.gz b/eval-results/race/0/ckpt_282/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e9ff60fafc89a67c6948ca4746998b8aa8301ff2 --- /dev/null +++ b/eval-results/race/0/ckpt_282/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b63c6319d0079609b6bb38584f550226ea8aca1c0d4abc69fe14e9e51a33c3 +size 1343453 diff --git a/eval-results/race/0/ckpt_282/results.json.tar.gz b/eval-results/race/0/ckpt_282/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82e05e20a49d2c207fbcd6f1dacc65c3658e363e --- /dev/null +++ b/eval-results/race/0/ckpt_282/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b32f26901674562a615197394eab3bf222b9bf4aa9ac1c1fa9a716a0188834 +size 2875 diff --git a/eval-results/race/0/ckpt_285/race.jsonl.tar.gz b/eval-results/race/0/ckpt_285/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7daf007a131180351f09e92186595e7a303e0b92 --- /dev/null +++ b/eval-results/race/0/ckpt_285/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed33b5f2482ac5be51e54e188ce75080a2794c8ae6003655d59d662b27520a1 +size 1343508 diff --git a/eval-results/race/0/ckpt_285/results.json.tar.gz b/eval-results/race/0/ckpt_285/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36504478c102c8a26c84870723165ca5eaaa7afe --- /dev/null +++ b/eval-results/race/0/ckpt_285/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6988a0996eddd499860cb14f103ede161e4366f65e79a44977ec87af0f5cad42 +size 2875 diff --git a/eval-results/race/0/ckpt_288/race.jsonl.tar.gz b/eval-results/race/0/ckpt_288/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b9db4215861e18f74f8f1809288d62e2f290a7e --- /dev/null +++ b/eval-results/race/0/ckpt_288/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdc9e8fd6bcd327fc31539591cf337162d22eb401e1d252bdc70da0db672dade +size 1343473 diff --git a/eval-results/race/0/ckpt_288/results.json.tar.gz b/eval-results/race/0/ckpt_288/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1ab5c6ec07227e1d9726a36c37a9ff13d7f9a37 --- /dev/null +++ b/eval-results/race/0/ckpt_288/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab12ca8431207df5468862cfa3d5fab1bb61b08622ede1400593f08b479b7554 +size 2875 diff --git a/eval-results/race/0/ckpt_291/race.jsonl.tar.gz b/eval-results/race/0/ckpt_291/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..274982b88fdcb406e33a7052cd18a9191ec81e33 --- /dev/null +++ b/eval-results/race/0/ckpt_291/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:179e3ff51250308677497a5752cca1ba841ef8d5f2eb11a238e1b8c4cf923664 +size 1343492 diff --git a/eval-results/race/0/ckpt_291/results.json.tar.gz b/eval-results/race/0/ckpt_291/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..92ab80f9ece4ea7c96f4fb42817ee72abb6987fb --- /dev/null +++ b/eval-results/race/0/ckpt_291/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6262206d7f8c650478293f760801408028c6584e4812e82602f36711275684c4 +size 2874 diff --git a/eval-results/race/0/ckpt_294/race.jsonl.tar.gz b/eval-results/race/0/ckpt_294/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4fc5dfed83c4c0b6e1f573519920ca390edb1e18 --- /dev/null +++ b/eval-results/race/0/ckpt_294/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85aaa932e5003d4743ca19507621ed02c302633c832c5c87340fb6771c760b94 +size 1343451 diff --git a/eval-results/race/0/ckpt_294/results.json.tar.gz b/eval-results/race/0/ckpt_294/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb9781139329241bc0097830e42dc6f8736fa55b --- /dev/null +++ b/eval-results/race/0/ckpt_294/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6930dba2f75920a6dee870e0cda63168b4b8ae70cade74db9946ac9b85ce6d5 +size 2876 diff --git a/eval-results/race/0/ckpt_297/race.jsonl.tar.gz b/eval-results/race/0/ckpt_297/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2925f9f4823465ca84337e5127ccc6fbd1bc5945 --- /dev/null +++ b/eval-results/race/0/ckpt_297/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:406a9dbf9892516619a8c7181d55147b50636fad13a698dde4cf58efaac33ce9 +size 1343471 diff --git a/eval-results/race/0/ckpt_297/results.json.tar.gz b/eval-results/race/0/ckpt_297/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f45320363899b47ad8cb18981fb79641086341c --- /dev/null +++ b/eval-results/race/0/ckpt_297/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9536027ad7d9b312d58ba88e107c90e7c7b90266796225e2623b9cc100ebbd9f +size 2906 diff --git a/eval-results/race/0/ckpt_300/race.jsonl.tar.gz b/eval-results/race/0/ckpt_300/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..657e6689dfc338ca8058e2cb520a9f5d69c8553e --- /dev/null +++ b/eval-results/race/0/ckpt_300/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41ee764708da0bb8fa9ee3b1f2b439016a460ce93ca69cd8b27546f0c0f3b51 +size 1343548 diff --git a/eval-results/race/0/ckpt_300/results.json.tar.gz b/eval-results/race/0/ckpt_300/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a131b6f334d731652c78a23fcccec60ef6170f58 --- /dev/null +++ b/eval-results/race/0/ckpt_300/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a798b6a4fef98ba48dcd0e75a6d5be9fb3919a76e6dd10b630c359134c7116e +size 2874 diff --git a/eval-results/race/0/ckpt_303/race.jsonl.tar.gz b/eval-results/race/0/ckpt_303/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53f3a44f7886eb2dacb04840a96aaa8269a789ea --- /dev/null +++ b/eval-results/race/0/ckpt_303/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba404f7227656124f3af69423a7372af98aa595786fb8a6878a9f835c278721 +size 1343481 diff --git a/eval-results/race/0/ckpt_303/results.json.tar.gz b/eval-results/race/0/ckpt_303/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..76a3789a89b5e0f72ec86ad93c99647c78b0872b --- /dev/null +++ b/eval-results/race/0/ckpt_303/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:871f20b75816378bcc4ebcd106ace89e62666fedb191c7c2c3d24ac8aa7fd47a +size 2877 diff --git a/eval-results/race/0/ckpt_306/race.jsonl.tar.gz b/eval-results/race/0/ckpt_306/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0c7f853716a8f86619209a42324fd712f4af6e7 --- /dev/null +++ b/eval-results/race/0/ckpt_306/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67f3aea5b4ecd20fcfe92129a816565268bd3b85f7263809e50c7a8bd1ebb68 +size 1343513 diff --git a/eval-results/race/0/ckpt_306/results.json.tar.gz b/eval-results/race/0/ckpt_306/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8ce8fe86aeb50514fd6fae03dd2b20ed6cee05b --- /dev/null +++ b/eval-results/race/0/ckpt_306/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e049d6e32c225cedba37b1e526c8d5c9647bd80acf4b337143fea58fd35f77be +size 2877 diff --git a/eval-results/race/0/ckpt_309/race.jsonl.tar.gz b/eval-results/race/0/ckpt_309/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c82ec273543e8874a2b41948e078c5819a2ec3a --- /dev/null +++ b/eval-results/race/0/ckpt_309/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22fa03a54cb27c3633a6086d421680e8a9a95d5afdd3e3f8e189b7488b79867b +size 1343524 diff --git a/eval-results/race/0/ckpt_309/results.json.tar.gz b/eval-results/race/0/ckpt_309/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a738c4463d2ac24e6eb2166d5acd7e24088fde83 --- /dev/null +++ b/eval-results/race/0/ckpt_309/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:982e4349c410d8bc43c9c9aea630cd44b573e7cdab4c8ffb0fd51778db0ccef6 +size 2873 diff --git a/eval-results/race/0/ckpt_312/race.jsonl.tar.gz b/eval-results/race/0/ckpt_312/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db294933a5770dda8dbc501be7f3e018ecf96e40 --- /dev/null +++ b/eval-results/race/0/ckpt_312/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc18f448e84690130e4141ef127b83965882d856d677943c63d94d329339e58 +size 1343403 diff --git a/eval-results/race/0/ckpt_312/results.json.tar.gz b/eval-results/race/0/ckpt_312/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a096275ab663f533d9ff8d1e48e0154926349f2b --- /dev/null +++ b/eval-results/race/0/ckpt_312/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dbfd42809ded7cbacef20106ddfdd50baddb1c3ff761874ce322603a85a85b6 +size 2873 diff --git a/eval-results/race/0/ckpt_315/race.jsonl.tar.gz b/eval-results/race/0/ckpt_315/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f7bdc8161d9c45043049ef90df4a1334cd294b7e --- /dev/null +++ b/eval-results/race/0/ckpt_315/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:223024ac2423c9f70f2be087c4bf9b95ab776b0e35a4829267de591b51ba85a0 +size 1343493 diff --git a/eval-results/race/0/ckpt_315/results.json.tar.gz b/eval-results/race/0/ckpt_315/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8dde67034d4886a6a80ceb49da62c449a0e009c6 --- /dev/null +++ b/eval-results/race/0/ckpt_315/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d573d4e575f41dd9a2bacb2ce8b4223f36ea71a4a33c96e861b21daa18ea1fe +size 2865 diff --git a/eval-results/race/0/ckpt_318/race.jsonl.tar.gz b/eval-results/race/0/ckpt_318/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99ac5757bab5140bae566eb5b2dae22601480287 --- /dev/null +++ b/eval-results/race/0/ckpt_318/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1084d88eb37b68cb2e55b73b5697ffa37c358f09d97d8324996fcf5dde6634f5 +size 1343434 diff --git a/eval-results/race/0/ckpt_318/results.json.tar.gz b/eval-results/race/0/ckpt_318/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e7bd53dcefa1d1be58d8974628a9dda7c7b9512 --- /dev/null +++ b/eval-results/race/0/ckpt_318/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1289992555b7087a4e3367ea23f58f477663db510a515acca565bfabb5bdc89 +size 2878 diff --git a/eval-results/race/0/ckpt_321/race.jsonl.tar.gz b/eval-results/race/0/ckpt_321/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aca14253b2c59c5580b75ae156431793dac2f141 --- /dev/null +++ b/eval-results/race/0/ckpt_321/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d4ed4fe1f7e1c6448bdce1bf389df9c86803829ba89d8a9015bf2656c3323e6 +size 1343424 diff --git a/eval-results/race/0/ckpt_321/results.json.tar.gz b/eval-results/race/0/ckpt_321/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..855f9d2bc975fe78352aaf24f9fbeccd8b521047 --- /dev/null +++ b/eval-results/race/0/ckpt_321/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5656acff86aa511c1dbc192d0959d0920caeacac6acab22f3b0db554ea1b2aa +size 2880 diff --git a/eval-results/race/0/ckpt_324/race.jsonl.tar.gz b/eval-results/race/0/ckpt_324/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc1ba99d5b9ade54176d39dfe3ef6e9a218d869a --- /dev/null +++ b/eval-results/race/0/ckpt_324/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83d7b5e06ebd17afb356419a881c0aa305bf1bb60bc1c0d21ba5ca1585452486 +size 1343524 diff --git a/eval-results/race/0/ckpt_324/results.json.tar.gz b/eval-results/race/0/ckpt_324/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6628938a4cd5d32483c58e68869ec38e6a6f690f --- /dev/null +++ b/eval-results/race/0/ckpt_324/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4adb7a0254feb26685ac8c715b3eb1b687bddeb6321232761b000611ad34c6 +size 2877 diff --git a/eval-results/race/0/ckpt_327/race.jsonl.tar.gz b/eval-results/race/0/ckpt_327/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a563a13ef3348e66f0e29aa678999dc9bcceba6c --- /dev/null +++ b/eval-results/race/0/ckpt_327/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c8bfd21bdf9708c6ff7cde85844609a881f126d429fc3a631cf8d018f364ba9 +size 1343607 diff --git a/eval-results/race/0/ckpt_327/results.json.tar.gz b/eval-results/race/0/ckpt_327/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e737c70d87b6dd29c7e4353dfe6335d9c248847c --- /dev/null +++ b/eval-results/race/0/ckpt_327/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3c0ae95b442c6765c2f335e4993b7b5a90c87ae37378b7e949f2238b6dc9184 +size 2878 diff --git a/eval-results/race/0/ckpt_330/race.jsonl.tar.gz b/eval-results/race/0/ckpt_330/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31b9325d1fd776a04ffdfa875976ac4cd5b3d0be --- /dev/null +++ b/eval-results/race/0/ckpt_330/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7629748e81ff900b3d3bcf9c8b7d27a1e0a650d90fd23ffe47c9d726096f545a +size 1343523 diff --git a/eval-results/race/0/ckpt_330/results.json.tar.gz b/eval-results/race/0/ckpt_330/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c3e6dfa45528ff04e87d5e28c3e69a13e9e0cd8 --- /dev/null +++ b/eval-results/race/0/ckpt_330/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:160abf6d4357b3fcc6aeaf76a30a0d067ec4b1f2a81ea907ef8524f6bb6f5a46 +size 2877 diff --git a/eval-results/race/0/ckpt_333/race.jsonl.tar.gz b/eval-results/race/0/ckpt_333/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffeeb7714dcb89f39c6bcafd5926dde243c1d9d6 --- /dev/null +++ b/eval-results/race/0/ckpt_333/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4509773234d6036b99253b4cde8d81c76c9a2d21aa1465ac53a3768494c09d92 +size 1343479 diff --git a/eval-results/race/0/ckpt_333/results.json.tar.gz b/eval-results/race/0/ckpt_333/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10bf12a4d354a7fa97c6f51f11595ac149b960f3 --- /dev/null +++ b/eval-results/race/0/ckpt_333/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd19567d0cb55206300c365358988fedd8aeb18c37e6a147a6081bb6cc0ec9b9 +size 2877 diff --git a/eval-results/race/0/ckpt_336/race.jsonl.tar.gz b/eval-results/race/0/ckpt_336/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03893fa3e216d3af0b83a776236fa6c4461d5fff --- /dev/null +++ b/eval-results/race/0/ckpt_336/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22116e95258a92b2578cb7cd606117a091faa74b28f4e2372cee07e95e227df2 +size 1343443 diff --git a/eval-results/race/0/ckpt_336/results.json.tar.gz b/eval-results/race/0/ckpt_336/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41378561c93621835ef06ab879544f4ce9724585 --- /dev/null +++ b/eval-results/race/0/ckpt_336/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa6be71744f989ee2c8a2e43820b6c2ed79b4ecda5dcca2cdb8438eb14b0d88d +size 2877 diff --git a/eval-results/race/0/ckpt_339/race.jsonl.tar.gz b/eval-results/race/0/ckpt_339/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c2b01982ea82f16ddbdd9f14a8363f6f97966a4 --- /dev/null +++ b/eval-results/race/0/ckpt_339/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f439194d278fad05e2465714487d1eb131aaf8028a6c8424ecae3ee09bdf3aa0 +size 1343429 diff --git a/eval-results/race/0/ckpt_339/results.json.tar.gz b/eval-results/race/0/ckpt_339/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b22e9cf44be19dc543c9cdf7f168f996f5b09621 --- /dev/null +++ b/eval-results/race/0/ckpt_339/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c58d28de8f5e98d86269d237c29257a47d8d4c86cb37506b9d7e40cffbf1d7c3 +size 2878 diff --git a/eval-results/race/0/ckpt_342/race.jsonl.tar.gz b/eval-results/race/0/ckpt_342/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f566d935fa293bc1945a607bdff0f510957d85b --- /dev/null +++ b/eval-results/race/0/ckpt_342/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dd37a0c4edd232b1b33af825553ef2b1b1a9d68e7f61a059721ce03698e658b +size 1343594 diff --git a/eval-results/race/0/ckpt_342/results.json.tar.gz b/eval-results/race/0/ckpt_342/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8628ee1cb7cc894b7aa9907e03feefd9563fe693 --- /dev/null +++ b/eval-results/race/0/ckpt_342/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56855d25f9ba3e2e79c0a05f2c4ae70e44b44bfb989a35136f20adcd62b457ff +size 2872 diff --git a/eval-results/race/0/ckpt_345/race.jsonl.tar.gz b/eval-results/race/0/ckpt_345/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a86ac34b67bb71445a031a303e3682128bc96c9d --- /dev/null +++ b/eval-results/race/0/ckpt_345/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04735bd03cd944e3bcfdf70e40fbef72a18eef340080fd90065369417573cf8 +size 1343540 diff --git a/eval-results/race/0/ckpt_345/results.json.tar.gz b/eval-results/race/0/ckpt_345/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..332d0f14cc3d4fb2e19aa98b22e2a039745fca16 --- /dev/null +++ b/eval-results/race/0/ckpt_345/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95db8e38d29981961388b499b1c10a314b83dd5c89f7296c546b2cd9f0ae2342 +size 2879 diff --git a/eval-results/race/0/ckpt_348/race.jsonl.tar.gz b/eval-results/race/0/ckpt_348/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..169dd3a008285fffa2fc6d7abc75cf86585e70bd --- /dev/null +++ b/eval-results/race/0/ckpt_348/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b584fb198092f284ca22eff040b6a82c90af591327917c9b437e33b411e590ce +size 1343425 diff --git a/eval-results/race/0/ckpt_348/results.json.tar.gz b/eval-results/race/0/ckpt_348/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2e051ac729ad176536f28b690b016b21beaf818 --- /dev/null +++ b/eval-results/race/0/ckpt_348/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0513813e099ffbaf8762d84fca7203f73572d54f4035e715435f3cebf6316dc3 +size 2878 diff --git a/eval-results/race/0/ckpt_351/race.jsonl.tar.gz b/eval-results/race/0/ckpt_351/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c179a27919fefce5418144df4fc144fffc82ead --- /dev/null +++ b/eval-results/race/0/ckpt_351/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e2e4bba3f012a0faca9f90b01581ed49bf120848e29560d3d30b8add6eb51a +size 1343488 diff --git a/eval-results/race/0/ckpt_351/results.json.tar.gz b/eval-results/race/0/ckpt_351/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb1945a4e9521d7f76df42520d09aa0e067615d8 --- /dev/null +++ b/eval-results/race/0/ckpt_351/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7100e53d00fed7b929aa2d3dfc97d1046f3cd9148bc3e15d490077eeeb04cdc8 +size 2877 diff --git a/eval-results/race/0/ckpt_354/race.jsonl.tar.gz b/eval-results/race/0/ckpt_354/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e256c9e5f028443d3782e318650b04159a738d3 --- /dev/null +++ b/eval-results/race/0/ckpt_354/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aff1e572b303fec38a33db2adfc0af04dbe8a0ff119df55a366ff7244cddda3 +size 1343554 diff --git a/eval-results/race/0/ckpt_354/results.json.tar.gz b/eval-results/race/0/ckpt_354/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8765275b044d1992146287861536400472f208ae --- /dev/null +++ b/eval-results/race/0/ckpt_354/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e387cd1ad80a269444520d81b30bae1611aafdb0e7be1f3eefda7582d8329b17 +size 2878 diff --git a/eval-results/race/0/ckpt_357/race.jsonl.tar.gz b/eval-results/race/0/ckpt_357/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c7ba409bacbfa4d1fb9affca4f3cb2874fee74e --- /dev/null +++ b/eval-results/race/0/ckpt_357/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c868488d7075668b3c75d1cb5ee44e03f24376248bd4560dca206f6b79d25d77 +size 1343487 diff --git a/eval-results/race/0/ckpt_357/results.json.tar.gz b/eval-results/race/0/ckpt_357/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff0a478e2de5e135d8e4423e11095014d178d70c --- /dev/null +++ b/eval-results/race/0/ckpt_357/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:636606cd5946b2b16459f0d07391e6efed2c40b0c34cc3464e8fc0aa0233d259 +size 2876 diff --git a/eval-results/race/0/ckpt_360/race.jsonl.tar.gz b/eval-results/race/0/ckpt_360/race.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..23e3a9055ad9032d0321d11248f8e97b88b4e2ea --- /dev/null +++ b/eval-results/race/0/ckpt_360/race.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f48ad4375ee67c08e04506785cdbba31eead90c578d8abcc83fb2a7277ea45ef +size 1343540 diff --git a/eval-results/race/0/ckpt_360/results.json.tar.gz b/eval-results/race/0/ckpt_360/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1ea40f206711e1d9bf37b7ab7bc6601dc679afb --- /dev/null +++ b/eval-results/race/0/ckpt_360/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d8a45eeac78457bd0dab383a4384d46c11eaa1feafd3675b0b2e91b9ea48317 +size 2879 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_003/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_003/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..641b7578a214d1524bb833a5f4e5975dc565f2f5 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_003/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31a3ccac22c517f5d31d7fded69800dda665614071d1a05faf9e9965f4c4ae90 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_003/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_003/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04f55f17f824e14b36e018b8050a44fe1a8ea028 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_003/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17d202042aebeb529990e14b8a594de6d0a8836768ca0b667326167adc83c6d8 +size 312739 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_006/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_006/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8c3d007aab7e06eedb60b9c7c0f3e7a1222f9d4 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_006/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aede7b19237dad793e89e75eab7773749569a8fd13623f21a3830fb6fbc9954 +size 3187 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_006/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_006/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..38cfc3c3a4eff931ff08bf6b1e69175b14ebe1f9 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_006/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e237c5e435b6b59cdcb8259a834b2c8a30d8aa55eb7bd0777e524fc4145b78d8 +size 313085 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_009/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_009/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff48b05b4ce9d7cf3205bac0ad8016b0d625dfec --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_009/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a578e6562a976d291020b8764c16a248b698e90e5d16eb26e9c31aa30b18a9f +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_009/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_009/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f99902f3d3eeda7d71b82a6df077d7e8ebf623f9 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_009/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63ceb341cd458c0916c936a1352a7a09a9071c0762d3dfbb9417723e1c8a3403 +size 313117 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_012/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_012/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8574845c819ea0bec750cf71a9d79754f388b91 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_012/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7fca531af1b9a124f026d474180bd02cad8068f5d2776054bd760e44d49a9d +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_012/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_012/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0b5fcde63ab286809ad3635dbe37087c5b7ad54 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_012/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ac5269c6c043a75cafd61aa9fcc77fe1cfca9b8b7298b4de987d1fe8a52231 +size 312984 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_015/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_015/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ddf7ed9f484dccfbdd4300ec859e8e3b02fb7abd --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_015/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b998fbc7482b985a1fa18ad773b4abcd8418f9506e5b38bfa1a79b7026f47a +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_015/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_015/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b306f2fec4584500566fb27e55215ba7376147c9 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_015/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd73672f7693301cfbd5115119c932797dbda5d9f66c46dc141e9a708bf59c4d +size 313276 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_018/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_018/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26b75716b28809e74a196e18500ff3947053e71a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_018/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed61b7d01673cb50a02ca163d79f44193a94b1104145e8da0837614977929bff +size 3192 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_018/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_018/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0cf9898cb414eeb86dbf3cc85dda2677daedf5d --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_018/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f29625a24ce8657b874dcbb580398eb1c466c786f46605414cdbcff3d945965 +size 313143 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_021/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_021/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77964daab21e2584ece2ea1b1e44524b208cd6aa --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_021/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17a36373cd1727da1f4269dbadbe210d55b5d26adaf144d3562c55609e8daf96 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_021/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_021/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3fb1b5333fddaee1323fbf083f4b99e41cfbb99 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_021/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48078cadceb29e6878b6ec8c704f0d55132ce7f876ab054f4c617260360de0bb +size 313371 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_024/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_024/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f4fb54ff60b9fb4616233bcaed000fbbce7a002 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_024/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384daf02b8e5eead558e750b4667237129ade6c737e23b5e513520ed5d73ea7d +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_024/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_024/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..900d7166894e3740f61ab651be1fa53f59fea613 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_024/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd25808e444b0270398e848d8db56be33a8eadaa5beacd8be047a492b806f794 +size 313386 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_027/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_027/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dbad83ad397b0411576fb0e3059bf2c70301db55 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_027/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d98a811498ad201443bc6d9ec572beb61d38c81bd62f1dddf76569419c570de +size 3186 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_027/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_027/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..779ecb38c6d4902020fe11e331e2a7153422e8b9 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_027/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f93b7069d6e60eabb6ce08ce28d824b6cf748dd8395e572d11adeb40c09297 +size 313318 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_030/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_030/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e872bc9d175671b0793fc748e8a5cbc67ea3ff79 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_030/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ee9aea1ccd3008b255a31bd348d8056ff2291a0189da10fc2794c38edf0217f +size 3218 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_030/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_030/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4442c12a652d3c438ac5ab75aeee75559b0b9714 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_030/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a497994886b7e472a86563d8bb0cbe2e7a785c21bb317a23a85a6c444319e62f +size 313395 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_033/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_033/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03f86276547e9cd3587b31f95cde46dc7ae8a5dc --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_033/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9482c2bc2c8440c46ab3e77f87bc56baee95b21b87870896c218293ce0b9c97c +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_033/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_033/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0264f2adbc9cb2f0c082898068df7a3ef057a9df --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_033/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2776fdcf867d4410dd3c86f5d0bad787045293ede659002fe7a62a9e7f607ce +size 313427 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_036/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_036/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..427216eec66c882ea2b92b49e59f1d1cd7250d13 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_036/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338668ab857fbb4eee3fa04b54953bfa2eee02ea50a8803a751e5026099f7840 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_036/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_036/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10d22e58f23eae755facdd5b73199b778c90dc06 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_036/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75aa2f8c12a295d9fcb6229c7c2ce9518db73c37b60d09f2024e2adb1f623875 +size 313434 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_039/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_039/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..90945ce6119fc7b5de3a66ded687363b33a7c966 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_039/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:561cfe6fe29b693ab599d694ea9bb01930fd5980b7f8d37b277feb5574d9114a +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_039/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_039/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94ffebaeabdef7c9110e89da856c97b21c167fe3 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_039/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b465e7b7c34f779a9b90af6667c89f3e1f79ba02d16afce28be38e2bf0fa644b +size 313249 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_042/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_042/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c5d60a16f0f73d45a67f036cc3260a525bbf478 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_042/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5f469773d82d66775b84dc6f5904b85b376274cc804dd1b8d97697109cec66 +size 3194 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_042/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_042/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0dc027b7296724079c8128a81ed2205cea877b1a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_042/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a120349a6e89bae75544c8a193b054514d0257dd829e61f0d45964b81a202497 +size 313310 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_045/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_045/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..616e880f39fbdb84c11774a9714dfcb6906e3d3a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_045/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20050e2bc59b512f64596e37bc83791b34998faa664c98450ff23f80e3de8ccd +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_045/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_045/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c18c3b60755bfd6bd50e8b81c69da79ecda5ec3c --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_045/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a479c397376f2c2abf60614a07fc6abafabcca9349868e24d425ad5dcdd6f46 +size 313222 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_048/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_048/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09fa5ecaccfe7ce8650f1a0be77084447878c836 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_048/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25b0deb260b38a1c23cd3407a0d814c273780a13ff37ce4dd91e42cbdd58902d +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_048/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_048/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..43bae5bf532a3ec67f68e6ec54a826a194d5f42a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_048/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:660960cc72bd3a059bdd590e037d503d82dbb1a00d6e3a4b3a51d4f72b399a0b +size 313470 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_051/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_051/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..564286528cc72e9db286540dc1beae79f97375f9 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_051/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af0184dd9ebdc2717e49b7c7e97d0b4e230313e734558088b35d21acb9c306c4 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_051/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_051/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26fd83e449b637e948d17b94c4fcd17ac73614f3 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_051/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8da968040b96ca909f78918bc5d81325905c46c3192db2341c2f4d01fa6b8b6 +size 313415 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_054/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_054/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1ae674b3c13997af106bb279f5b471227c64216c --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_054/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e8fb5c56f0408f346f8c137539f74644eb2557fc54f413f5ac040f8283e144d +size 3219 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_054/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_054/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03c0191a7a577086207a80faa69cd3f9b1ba45d2 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_054/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9ea29a1e5a5c4c6b603cec98f8c3ff82cfc1250ad57f1ec7d7898a56a7ac86d +size 313372 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_057/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_057/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b44d86b7532df70bac33f70f64e5e63cce6b600 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_057/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e55f5f826586c177f98287044d239f84828cc21f468149dbd7ffba8563d8091b +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_057/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_057/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ebd2e956f8e6edab3ac25e03aa72d3d522a31459 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_057/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6306ad9bb8bc0de054092d1af08dc17bcc0f6d7d50bfeb93bc2949f26289ea83 +size 313526 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_060/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_060/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d845ee637d0bb4294f0d17b5e83daf0f7c1e703 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_060/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d52d3cf604b8b91cc9b8144209942a262c3badd1464c942b1b7b13e118f5e1 +size 3220 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_060/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_060/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fb57fcf933a775a2448d62c664cf0288cf2378ca --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_060/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6384bcaadc1041f70b5451789f78a14201dc15b6df1fc1a461fd52d449684548 +size 313466 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_063/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_063/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a85f6a6891568682e6d6f7e52066abc73fc6a335 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_063/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:604d14a2c87d7c4b89322383b98ccf40e167950e99454c14970ad7acffd53b38 +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_063/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_063/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..45ee6195afc397d1ccf2b1e44041aebee36efc85 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_063/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c9614dd3d5712b9c61ad6fbc434c56b46deee6413215c88f57027ad25e61824 +size 313284 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_066/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_066/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4bd1169e80d6fb6da28e3027887a1f919b712d44 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_066/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5e1c604e1abd9852d8d8f9cc5018e91a72a4bb15f0633348593fbcf424939f +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_066/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_066/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7dd15b2cf9a0d339acbb241029c4abd798b47282 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_066/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9412a712e99a6025e46af53759ba08f4057623dfc3f4f7b3f6e2ef56ccef1dbc +size 313293 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_069/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_069/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..34451a47bbacead33448cdf0c8d8bf972be3073f --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_069/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ecd9864cf048c92123a464756f606cfa92bdc2132c5dddd309ddf9845e03d0 +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_069/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_069/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..048bcc57929a63e8c70856ac00e542849e0e42d6 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_069/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7aba30e84362f7c11ea126d5f2ee210241dd0a1369a0eb1f295e6c1a74d6b69 +size 313225 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_072/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_072/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d761dab2686c0576ab66d0cadc0765e2012da47 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_072/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d974432672c7c35a00239121eab02cccae04118156b78b6ed94dc7d589c46da +size 3185 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_072/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_072/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..81dc231e2d21bc08a6686b2d2ebf4ff5d7cfc7e0 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_072/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c35b554547b7de90e4d29cf2c21942b9bdd1c6a9a75093cb42985f14ecc66ff +size 313418 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_075/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_075/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4a0390488d87f61b4251f39dd8ead3bc607a0817 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_075/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f66cfb0cb6e2d5b13e9ee8690eeb6f2c1d65011321f35dadedf27d523694fc15 +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_075/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_075/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fcbc70b076002ea390061840efa5c64723d8884c --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_075/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f3726ffa8307c26fba0ca074ba15e8e6903f26a9c913118c5fb372348413eb +size 313608 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_078/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_078/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b453364456f18419e8aa9b6ad7e418c544f1ce13 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_078/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12c2164d3dd3bf8fdae052d745dea5781431d0b87e8ce8568593acc5f939f7b9 +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_078/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_078/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dcc5d5d434388204bfb4789b3ff239dfb6cfd73b --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_078/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c24591a32b20e8546955bc5bee5f11bcde4bf6b8c9e26eb7cd6abcdf6725b6eb +size 313301 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_081/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_081/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e59b85894a38fbd48d9c68f0cdf140c9b1530e5 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_081/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:426dc5622a772017b58cf1f9ae95cd5edef576c9e60620648b7df60a6b9f2532 +size 3186 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_081/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_081/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ccb63ebe959e4d57b8514d6e36a1705612ef6524 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_081/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88bde5d4e09e3bac422185b4610487d74de3a034d31559c7413f58dfb74ffeb4 +size 313545 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_084/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_084/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4295eadc1d6c781d91e45c5897887060fa533f73 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_084/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2160d9ad6aeb4bbb3e31d89bd8021c84bf6225e05bf61099a09aaa99754e6901 +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_084/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_084/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67d0f3bd94cdf31385ee8067a39f80243e83ecfa --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_084/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4789e46166ac496f398f23ce24f9295e2fa95710f336b0834e8084afcb95248d +size 313621 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_087/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_087/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea14996cb815455151e9f82707d398f0b6ae4050 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_087/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ab8bbd89ef53ad810ca0b6d50e2b48127ddf8cd03e8969f0de6a3433ccdae9 +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_087/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_087/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1899b528e15394337fc893ab5b1114f7858ddb5f --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_087/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb8de78ee6b911d1ffd6c23400c8fa3f421fe75e6d329cecaf0e45a2b3c6884 +size 313347 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_090/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_090/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..71614a624c54e374973e49e00bd31192d5ade635 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_090/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c22b52432bf9dbe540b51a43c9c4adba251ac5302023b42093d5e2de31223b8 +size 3192 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_090/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_090/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b887c93adf5143e98c97ada1230b31f9eb5dd7c1 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_090/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58028ef63eb7e0dab2d4c967a818f5e39459466eaef130d9b28e00084ae0f6ab +size 313513 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_093/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_093/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5f3ad60c3b49ac653e39fc24a6f841fa8181126 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_093/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d273ae3544d2362c53f653b5e189996c2cae510febf446bed7745d96ce3d2db2 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_093/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_093/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cb0a6ddd34ead25702721682d459a022b930b06 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_093/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b0cba72f28fc3a515c91e8eac5d3150adef2a7f7dd0aca6416dad24e1c8e0e4 +size 313421 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_096/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_096/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4433994d08faa94ca3d544c120c3d3079721f756 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_096/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5206d88bca1040645ef47d223b6caab1f9e10d28819f2724fc1446802d68dc +size 3186 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_096/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_096/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1e1610124f0ae1f460e07e3c262011fba4f7f053 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_096/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095cef4124acc41be30daebcad7d08ac2e791ae0677dada65e310152ca8ea89b +size 313600 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_099/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_099/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9618545ab3e5c5b95804ea008286ce1546eb37c --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_099/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d241af4273eabe405d77bcda1518f8ae6b82bf3b8c1557e0ee90dcac6e5f8ee +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_099/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_099/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2986bf1780f259d9a954d751cab13c70a52c8e9e --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_099/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33f73a5ea30e414d934d1646659b3c120ae895002d54ba13e83b72f691fa9468 +size 313439 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_102/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_102/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f300cd383701d9ce032c76ebde1e3aefa995298e --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_102/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c877668222f5d9ece6f4f01165108f7e028085501cf06457ee0735e126e1feff +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_102/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_102/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4ca52ea3c6f929f728aacbcac670af5bbc84fdc --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_102/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff793bdf8fc4a311f2c7143c667dd9f9d36df03840ea899ce60b99c9813ce8e9 +size 313424 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_105/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_105/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..498704a98a129c7e42df762c9da7ab782f7590b8 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_105/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb05267c59f63e6737ecf5cbb2b3d6e13c48c6af44ca336d2da8c25fea0c9f9 +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_105/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_105/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..623f8f48f8c2fb5579b8ee28c0cb45ef23f6650f --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_105/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f878521b50f6dddb886005c5018f50f3fd8d9975866c62dfcee7af60cd369da5 +size 313337 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_108/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_108/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0e00348b325788c437d8b12c54ee7f8a56096a2 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_108/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61961d3fd63017329c45bba2841c6478c0f693cc14003556f56a1032349021e8 +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_108/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_108/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c7d75e5559cc6bf975ec112364dc6f0d793e48c --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_108/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea74c00fd4261db18216df8174065501796a29db4e6fffd790874a7ffebd4b67 +size 313264 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_111/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_111/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fdc3510f0384733ab239d142c681c3936378535 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_111/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c641c94a4dc51166febfa220e4e1a0756608835e6ec3fda912f6fe29a5a2a633 +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_111/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_111/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..62da7ad5f0edc08d025dd7916fb7ceb0738cdeb8 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_111/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66172b0f3931fed847ad0ce0b2480c1261490fce2a08139c8c893150cd14b7f0 +size 313425 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_114/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_114/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee4dabd7c5eff5c9b65cd1e2024b2eb932e5ddea --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_114/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31090ce1bdb94aa74495b9c60e4539a6b560b1de652fd7ee0c61a0af67612846 +size 3185 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_114/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_114/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1d923a027504e0974a960e0a0d42f18e56b54858 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_114/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb762c668d71ce1bd7bf31a397888ffe185bdc397d677a4cfd9a4d26ba5d6a2 +size 313403 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_117/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_117/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..65c6ed63ee697fc9823cca129ab1fb6931256a68 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_117/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473d1f3fc1e73c9b6ff64389c2923c6ca6f68863de835f166230e00c57e67ece +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_117/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_117/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d20bae7abfabd518777a5b7bfe75bb7313950ca --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_117/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed065dc36586f3e16f15cce4de0b026382f95081347b76656b4d4e9dd51a1023 +size 313425 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_120/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_120/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa0c7d1a88790392050a02d5f8f7263e5c25df02 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_120/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ff15e4b8cd8ec74e4346571175b566f2fe317ec25e6a3df5329a0b8acd5319 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_120/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_120/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..392051437d41fed27c20a9a82eb60ee0d546cf0d --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_120/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:252995a76821a2b86751d13d64d9ec359b55ba24ff2fe4d6aae66c333053f3c8 +size 313425 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_123/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_123/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d7ed4c4352081cdfeeca160ece4787a96f78903a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_123/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbbfcf55a641f591543b4ce034ae9fb9c988bc32d1e776b744d1e22decd9574a +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_123/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_123/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d46555f5ff0aada3f02a7e609870ae9bbbab99bd --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_123/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82b3f2fbaf7426fb810b51891994b7047195e87d6f243ebba8144e197aca5aba +size 313358 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_126/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_126/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..df56dc70d0f19289c06535b53ff894d8b04c40f1 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_126/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdbbca0cdf42abdb31c2388eeccf44c3e499402c0b15c9eeb0deac47e894441e +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_126/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_126/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..956a31a5da6048337da24e0c4a398e1a4cec9fdf --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_126/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc7a08b596f9cf857afae4cfa173f1c37f8a61eb6b5afaf8d7e645372e625946 +size 313366 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_129/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_129/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0bbce0040b31d2b139311cd6bb660853e002a12 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_129/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3acd1609eae6e8e75704d1c0694f5eef1f16deda0f26b05e9f7d1398355b36f6 +size 3192 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_129/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_129/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..99b3d3a9d3b5e4708e33629b464b738f6ce68037 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_129/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbea824cd0a2976e8dcb6849b7d44d5a8b2919f11d2c37c679c32992fbf86193 +size 313250 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_132/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_132/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a09e2777ba825c6a88a2647988f78f447149dbf --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_132/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:732a183d51b08fc1ae008c8b9353d5ffeae53186a6ffc1d65ca44a39a83dd3fa +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_132/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_132/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2628ce493224a0a9c7b5bd30e71f7448bcca5991 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_132/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15188e767ae5e17d884eebfd8c1045c8fe9f989d405accaefa0e2a97db874988 +size 313500 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_135/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_135/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..63225b25648c492542b577ca55cd106ebc15795a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_135/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c175283ae5e8127c140fbd601fec9cf993d28e489b6f61a08410ab85668ca4f7 +size 3222 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_135/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_135/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54aa746529fc2c3a7f0f32b89b7455214e7c2f4a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_135/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:963092c318393195e7c6d69c19745010ec8be4919786348689f628e0a00f15ac +size 313566 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_138/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_138/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..50f2a9b012167c49a58a53d886198498ec28bf95 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_138/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:178e423ed96981d9bc743e021c60b1d179e01ed3f0c7363a82d969d385e9be6f +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_138/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_138/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f0d96356fda93eb4b77481dcde419b93615243e --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_138/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ae365c426431979bef53ec086ef4f80faafda2f40e188c53c78638ffe7b135b +size 313300 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_141/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_141/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22a07afccd898d5807ad4b93804d0f9235657674 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_141/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b649d6656de0753e57b714b4033637b207d6c25f169d5624576f73f5557d020e +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_141/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_141/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d6a3b6b90c1090b9578dc0f5fa84d81808c76c6 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_141/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:361d0339adb89fe8882902493838f485add2970e352798686ba7808af47f265a +size 313448 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_144/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_144/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5c5f8e6c6f0e7ffbad45a11e9d284a792eefe8be --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_144/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10624a267bbdf3450136a4586fdfb18dcbcf118f05c385a680f16929b2ef6f58 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_144/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_144/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dde48f365516eeb4bfa8644de5c2a1d8d95c1433 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_144/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b3a04451cea89c29659818c4bb25cbd41fbc1186b23937f03ab987cd782f3dd +size 313659 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_147/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_147/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53416c22fdf403f45a0f89e2a4dca374a38ad962 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_147/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7846aaea4f95bff2b61b62c236e580ea1a1094a530d84bb64e20f1fdad44a3f3 +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_147/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_147/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fe7b078034fd575dea020ed3f7f079af68426da --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_147/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2935081a036e417572c7fd267039e6f39394ef217ab2837d0d7bf3aa9871bda +size 313465 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_150/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_150/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fde27f33834983ae62a781bac03a7de18310c919 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_150/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3670255513f86599ae29a51b870180aedaca7346173fcf37b166525e5d5c4896 +size 3186 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_150/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_150/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f4add4e08dd2e79d769bccee9e5ffc8d014ea36 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_150/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d99744dde0d353c81782e34d5d35b3e13fd9cdbcfe65b2ab324156c11ba9ea60 +size 313523 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_153/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_153/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16347247ea4098db7cc3f5e72c43d6d691f37c4f --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_153/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76b80ffbaaeca2f664d561248f15fce0be7008254f83602fd5ac5bfc068f8327 +size 3194 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_153/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_153/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3645b7f5ac25f250a4108ecb863d00cd60cb55f6 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_153/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99f8bab3e976a9cb012de32662ed15affa4f7cc7cf164d37c0e4b837e442b51c +size 313406 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_156/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_156/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5caf5e486c2a9b16551705756a72465707fb1123 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_156/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3caebcdaf7e55abec46cf685a95ed974cc83b4e47a64609b19b341ce4edc0b6 +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_156/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_156/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4625ea0cc5f5187bb4fe2a43b7cedf5969884c3d --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_156/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8194984b906fa35e742ce9686442d4091e8b239a9fca6da7c0bbe1dfbfd06f3 +size 313557 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_159/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_159/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..806e697230206049ab8c85c789a9fec00f3ebdeb --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_159/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6395f83c2f10c21aa70948e9ea4940f0c660d9abd7832a30ba76a2e232ad4f +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_159/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_159/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f05dd665644536cb98b3376cdd9a9727dde25130 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_159/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2baa68f3d63f3d8eb9217562155680bbeab75720f72e0a77445b7f8aeb19f288 +size 313585 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_162/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_162/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..260c49a03302fda6f83526e3b769f66b7234b5f3 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_162/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3decd31deadf69f94d0f8b319ddbc45a77eadbe5bb1e6eced4524f84852b11de +size 3192 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_162/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_162/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ec89b28e0de41e3e29ee07c23e3f3d2ee0db124 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_162/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b51494a1783ada24b2420934e2bec3321d7e1ea95463910039626049e7440f72 +size 313594 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_165/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_165/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..85b095639b05ee99016758056b64447c2b18ed35 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_165/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f721cc45cf4bb45e675a62903a19aff4f7c61452e5ea49bdaed7f066c058ac10 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_165/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_165/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c579e3d6892e1bc4bf329f6eae5d6572f5e2cd8a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_165/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d167a2f6eb3248fea78f384e854aae2639e39cfa5e96c88dc5877834aa3b3af +size 313499 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_168/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_168/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc59d9bea4c053b39274307aba72f402889e2aaf --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_168/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:947f5e59b782c060db4df18ab977bf3323670ed73ca1b54d055b0ffacd824dbd +size 3219 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_168/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_168/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..18a24f0f9e47606a650ed5d0509c8b10a849a1fd --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_168/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab4d70dc6f9d41232a4d0445547b1e82172c2ee672fc527d84ec608319470bd0 +size 313540 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_171/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_171/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5ec27251a384dcb3f22cc45ff9dc957a3b6fc51 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_171/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aaafe4c7c00aed58d360e4c0643ec7af28c58d217ec8ff22a42b7c107c8a70c +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_171/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_171/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..aaff433e537cc8420cdf6ddfc9e50f83e4f135f6 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_171/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c96edf3fb1c1a49d624982a9d6bf55410e7810fbf541e032c774e38abfd81294 +size 313510 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_174/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_174/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3121741ff38dd1fe50082c587fdecf57b179bec3 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_174/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67377031c5860e3585c3c6632490eb762ddc79eda82b9060a31f2f9232668eb2 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_174/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_174/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36180accb3cb405f11f07f25ddf0a001779f3f1c --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_174/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa430a88d8c81bd487acb4347ced36e00bddc96ebc8264ecaa320efd7923f15b +size 313330 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_177/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_177/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e13f40213c28bd301910c8729bd0d916b765471 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_177/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e37b08aa8d57ae80eb4ce476eaf22d446038c5af440c7382bcb7aa9e460d83 +size 3217 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_177/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_177/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ec6f7aa86e06f0a96174b8af92afd1517d12c868 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_177/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:656d6544f6531747514c9dbec3c0ac94fa51c59ec8f772df06b41775aac8756f +size 313537 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_180/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_180/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ef4b658465a3a695fd2393193f125f22a368ed2 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_180/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c2b34fe7e8d412e8181943181ba58195b0b737d32ac5692a861b8a52f7a46a +size 3221 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_180/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_180/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7867e8ea47eff052183b25799db442138bd8632a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_180/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d179ad7a74664e94baff5489afb1f9d07ecbc81220ca64191eba9044519bfc +size 313555 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_183/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_183/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15f69298a7f50b8bcd6ff00cf7f0eaadd3bb5d11 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_183/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec0c6d17e7863f92e4edc7f89bc37d5fcd9cedf624e69345b06d4f633f95247 +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_183/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_183/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1eedbbb0e2686f25c6633647e43b254424748ea7 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_183/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5872cf2bb97de286e2d85fd070d38ffd3881dfe999a899b41fdcd8493fa5100 +size 313589 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_186/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_186/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2828189d8692e5a5c1592be46b01bc8777cfe1f9 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_186/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699ad2d34dae0083d2fc66d1da1e1cb20f1fb111c68f667b517792b8304acf02 +size 3220 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_186/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_186/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5c02107043573d74014f063d5bd2f0a3144f218 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_186/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc864f932630149cd6e33799c365e299931eed616cbf38a51ed3fe829c1b71a +size 313416 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_189/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_189/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..803a021b2e82a84da63fef0855126870a360192f --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_189/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b0d4fa517c4915641715cd79b660596c71a261f110b50fcadea3719ad91e072 +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_189/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_189/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..325fa5efc8c6d6ae0e0beedb9e869f2cb69c475b --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_189/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b264fe62cc021919ba32607cb8b7c702a1097812b1ec4e1c10338fcef5122672 +size 313396 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_192/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_192/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0eeb0d56473e87b3491162ebebb85d2559e42217 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_192/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9a2571b7464dc8363c6b959d6367e45e4ed8d3805ecfe166059b192dd08b142 +size 3194 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_192/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_192/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f765ab927f89b09f94e46f8d2232c0a93e9bebf --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_192/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e34a2840d047923b54827d265e7eaab9e7e9c26d69559f9ccaad628a3378472 +size 313582 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_195/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_195/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e21c49e82c9e8a935a3ae254c24328f6185adc6e --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_195/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b168a12d8682b3dd8f5aa920f8e3775b65fe2de133085478cb3b4dc6fd14071b +size 3187 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_195/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_195/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bbd08156f567961ff401b9e60686094bb94a7e5e --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_195/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a693f3c586621f467ca90fee7611533297d23ff654b7a65dcb480d856f58da9 +size 313341 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_198/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_198/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..db64d3840c2d4d7578d1b62ea2cb479c8bc57322 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_198/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:784e0d71b2eba631524648d362e38613649d025fbd8427df91453e80ecad2382 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_198/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_198/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffab716c97e96529486c3841f46bfa192776dd3b --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_198/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32466ee332b7727a5e9788ad17ec127db70c831338d9e8350615995487ead28f +size 313538 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_201/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_201/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cedf8225f5e254e8335e3c5b8a161c0f4b220e6d --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_201/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da8e3032959d207f753cc8972b9e20fc4f2fa5ecfa2698fa3a9921bb03096e9 +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_201/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_201/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2b62ce6cc39c6f7a39600de9ccc0badb3cc642fc --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_201/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65f495be66c9b80412949d6f78e4f1a0b236d05d0233c80a8ea491afc243f1a1 +size 313447 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_204/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_204/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..69e338b113c461bf9b17339da8515ff6e868020e --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_204/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56de8e6ac8608a452e3beaee85f3d70384056452e7a3d8ef9a447ba1f191ea9d +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_204/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_204/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bff8369866ed3a3d417f4c92c642e37c4e4c52dc --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_204/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b96e3d4d59b82e1311f4fc22ef10f06cf726fbe06aa77b9acac8e24869b2c4b6 +size 313460 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_207/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_207/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c50ebb506834fd10d9e23f35d495a4bfdc9174c8 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_207/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a872c5fd541ba18e0b586a73d5d2e654368c11771a399c880c5bebcdb218cc +size 3187 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_207/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_207/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c71db5fe8acee6a8fd9382aa2ad2d2993deed38f --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_207/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db3ce27d047c748386038c0886cd6a8c59cf3a5d9ff99d976e2d5d1936efc139 +size 313419 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_210/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_210/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4dd13899863002b4efe04e5bcc617f05df84f1e8 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_210/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf01d319179faf944734040a045968ed1440edbfa4b42250dd40e89efdffce5 +size 3217 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_210/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_210/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa2d6bf91eb6836234e7c4f6fa47de37a2845e06 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_210/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d2a9678dea7b27009a51210a5a4f85faf0f426eb5e5221f71c29e96f3086b07 +size 313493 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_213/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_213/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19108999b45aea2f9b1677d36a295b25a9bb8e50 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_213/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa6d13001a0597970bac55e3c5438040f5b66a8caca79fe1475b97ffa22aa7a +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_213/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_213/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba945e414c791f2fa098e5c5bcaa89e0173da4d8 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_213/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6249094f9472a6b51315e233cda9ef54bb7bd3cb98c6296f0968ce65873f0fca +size 313487 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_216/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_216/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abf92ecf392a190caa6015938a7f62377bc30178 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_216/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad69f03fa842eab28e302a79a1880b21ef67ad7cfe741bb7a214762f80a0914 +size 3219 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_216/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_216/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b355e12174eb8fd5aaa4513a3c58e08c9f10cc03 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_216/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e935b637b737998dc5e788a3819589f96b4bdffd2349faa5fb658bcaa9cb94ac +size 313589 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_219/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_219/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..abd472576de452fc11d5e7533c64f8040ac4b477 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_219/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3e0e1a940f4bfcedd79f48d0508d65116933b538349631ffff814a0b5449e40 +size 3194 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_219/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_219/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..104468d631720f5e0c9e7d7300f99d38ce0b36bd --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_219/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85004ded320b84114106519532a47d947b270d00f64df1d605039b2eb5a5cb4 +size 313393 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_222/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_222/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82143522df79aca895bfc8a91eac2d56c826a420 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_222/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89846157a71f99659d6b5cc26fca6f69fbddb6f8fe80ab22731d660286ca49d2 +size 3192 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_222/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_222/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..10d5b5966512aaed8af7f623ee1a3540ec94f607 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_222/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33131f96767f6749eeee0146a73d538384a651eb3d3c40b5e7122c97028cc085 +size 313393 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_225/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_225/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..14f1388a8e808a821471e2820ad5245f058b0cdb --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_225/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85cd312b82163b295f0d4ae9638d4b1b292951b86895b94282788573bf03284 +size 3187 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_225/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_225/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4bd9d79fc93d67bebe108e075dd02c8f5af00413 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_225/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42bbb63ee91319c1e606379bd50864e467655f201e6d3f227fbf59bca0de581 +size 313363 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_228/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_228/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78f8c92a91b004051268d280b66c3824ceff212c --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_228/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec8fc3bf9edc14ddfaf78b0cfcf50c1c86d4c07f78c887f6cc922458e1fc69f +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_228/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_228/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..181b1c178b0832fff022a5b92c67e050daa42878 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_228/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7483f061d2f1fcd5bfe9dec025fafb15e28bfc9f8ad18a7af764abc5912ef6d +size 313217 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_231/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_231/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..968eff19bca4fafef61671b994ad186163223177 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_231/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba0ba09e88c2dc72ae0c6c21741e8e8d7c44524d5595de2c2859687e36aea9e6 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_231/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_231/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..912d151127579a8f41ce35fbb4968c361db5d5a5 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_231/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ededa1abfbfc12471476f62d87e60f51d7b31d467dc02c02be3a778123f08d0c +size 313113 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_234/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_234/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..20d23c37f7f47078aaa6458caaeeda422a3f98bb --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_234/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6872f25e802da8242b4bfecf567c71a65d6a09adac2e4cd121e1a2d4fb6e6450 +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_234/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_234/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ef796d333db39450e2d32d21cf39951ca3bdb05 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_234/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceadb3a255643c4f9c7dc833c1d369de318ce7f094ee51a58d9235b8f1a2f1dd +size 313163 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_237/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_237/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..957b5e0afe7a4ceb6945a1188a68fac809c1b44d --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_237/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6263473b1ad44e58259a3cafebe480321dbf979df0a98bb6abfe82984542b52b +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_237/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_237/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..104953edeb7bff1e9f922ebdf53ae407aa50c9ee --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_237/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d12f3b6c7e0978de9a6d365b8e31c5020e195989ae9f6b4694a0cb100090e6ff +size 313327 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_240/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_240/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..663772e7af665b55058bad3464c562ba3ee9680c --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_240/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f758c50d78b437dfd3d342a39f71b830ca7d8b8f33a96154788788ac255ed1b5 +size 3194 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_240/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_240/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9626d25c00503680491080f98348b6eec0615f50 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_240/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a73625f3806792870c02a8aaed2a3385cdacbfbcedd4cd496fb70cc872c0f6e +size 313359 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_243/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_243/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb1c0b124d2e7f14388f0e45dd233da1f0c0daa3 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_243/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd4c3cc624a425f1eef5bf3ab902699c92b747a815bb3bd62bb09a0c674fa9db +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_243/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_243/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c9f38790c59cea4673c4da663a76f689a99e3dd --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_243/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a55ff54d2490b89d94d4641ec5cd78c2fb9705f6d6850304d4330a502fe8c06f +size 313291 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_246/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_246/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..674be738e82afdc4165cd546f2747e218371b56c --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_246/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f22b3c27c8d238efbdea052918d453a53d45ca897a4cfb6ede298a4c9570ebfa +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_246/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_246/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fbb4060a0de666e54051e660b443f7951e380dd --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_246/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82ca85740178f734bd77ab94d48324a51d531c71dbfb76dbc78d81bb0ca674b7 +size 313325 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_249/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_249/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7b505ca16bd2c60cfcb55429e738b7d84da31e60 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_249/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f45a76558b5529aaf3c962b09a4b6c588d2f244dfec385d26a0a8d9f538babf1 +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_249/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_249/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d60eaebc7c45044c0df6d6122c9685e8549c972 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_249/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3e83c2edca103fcd5f41fbad0b3184e31e9737dcdffc6b1c8a36d9b7f9abcf +size 313288 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_252/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_252/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c4fdbf3d1efb38b02c46937c38342e629c54517 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_252/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadc6c51e8463ff84f05b9db436a254bdafb9fa6ba61cdf0318714d1dba3a300 +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_252/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_252/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..78b3698677a66732574e6c93e52deb96ebbaa2a6 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_252/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa6568898418e506fc31725de6d4ccc9f82ad396ea30dcda0a868ed66a38fb01 +size 313330 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_255/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_255/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..494767408919cba9871fcbf7e74edce543840033 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_255/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e35676d0809c85a442368be0f4afb48c0ccda80d3526fc1d9ea01a98c82b36a1 +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_255/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_255/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1d7f7426cf6cf0ed156ab7b247fbd10833f0017 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_255/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73464e81cdecd83775c474a48c4e42d7e1923b5f35f31fc0707141de98fcc58d +size 313390 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_258/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_258/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c161bacc12a414fe1efef50d1bff175573dff0a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_258/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03cfd1b236d05442b0d81b0ee9d32b24a20a7343bdb7d1027db7fa8dfad70c50 +size 3194 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_258/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_258/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09e15430a2210d059271daab55779f31aeaa7239 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_258/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:004e8257803169a0245989d340b2d12fda5a9ffd325329dd2ab3b694512e3a06 +size 313277 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_261/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_261/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6da61501cffd68c427f9274d28d659c33bb0f5a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_261/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37ece8d1dc935963b6fa12cbf3e27e740731aeafda7b654490a895cdd1189565 +size 3192 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_261/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_261/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7cc5e1d29f0552e77f97ef28e8b9c372628b2a9 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_261/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74c4ba5e205c4cf0ecf8a348492ca3bc88ffc4a61aebb08dd9548aa0a506e1de +size 313462 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_264/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_264/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa6fbc0a1d8377145d1b88601a881c319351f23f --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_264/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be64a609c975e2a82d35ec62aeddac6c39a9ed67345ab252cf12368a58725f46 +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_264/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_264/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..66eb7cc7ac3e0bbddbae5c99617748980197c203 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_264/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89fb455de8865bb659c536d70b0c8157e300a47094d819787483725f995c9f06 +size 313395 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_267/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_267/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a59c192e11af89591d440d1e39488fce1898546 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_267/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeeb45f845ff745f05418942f0c1d56cac6844c66bf3259293335e6ad2d671b8 +size 3218 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_267/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_267/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f9f5e432d23f39e40c2c15ba5dc0d42dbbf23fe --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_267/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d7e71dc2443c736156418b346f98cbc8d827bdc0480669b6f386b98786b2fe2 +size 313193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_270/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_270/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d196bcf51c13d96e68b9c227429d66ac61a1b3a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_270/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00349dd6cadb82c82cca08e52b52d051105855dcf7a3b367c69f9292219ad85 +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_270/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_270/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..48ed1c5d30d09171feb6eef4abe11cfff3cd051f --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_270/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f87cf5f8405e96ae31a50c295644dc4214c119adfc069e2ad40cf48b655c69 +size 313326 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_273/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_273/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4b514097f26ee3e3d04c7797d8d368530a3bb21 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_273/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286ccb02153d4adcf1fd443d368bbfb08550ba0ed9c22eb6037ba84fc4770d80 +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_273/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_273/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1263ba1c0686f41ac846234c85b46ed2f822bd0f --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_273/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c91c6feec07ac02e8bd45a5b7c8e856adf533b7a1d18a03d372e1a6193c3e10 +size 313324 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_276/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_276/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1c9ad079c611e4ea44d83f72d75c431f1b51102 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_276/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8cf310b4185a0e3bede146bda9021c72c4d25e466c9b63d0966f6b4ba1fed59 +size 3223 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_276/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_276/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9dd754cca3a2b425dd92d8cf6597ba93fa85a39 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_276/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b29873194a0dd6dbbb9a454c20f84870e855d6d1bda5598b8d949d03e552517a +size 313485 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_279/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_279/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67aecac479c9cb3585aba7c11f9ec4d66b55c0eb --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_279/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac773bd4b2052ffb868403d79318709121026612e315f8fe6349f647b8354ec5 +size 3186 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_279/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_279/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fd2eb7633c570f1dbf082c59d71eaef247abd58a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_279/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c2da8d210881f1f7e515ce31b4af724b550b36f4f02be41cefa359c3fafd603 +size 313435 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_282/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_282/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1839712c42ac3679f906c96bc61cac1b74e18e26 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_282/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b854d280cb1d3eb24931b7523580d9db73e012032bf7801286e1265a94d22e81 +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_282/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_282/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b7cf91147719d8aae9e91600d6605b72e2651b0f --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_282/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6862eebf2e02e0532ccb0edbc90c240a1af76cf3f21edd15ea416bcef03ffb51 +size 313396 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_285/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_285/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb00734be72d1af032e20ef9859a5bd9514e7d11 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_285/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2ee6f3e57f9dd5b448369140aad8117edf793e77c48cb0579105bbf8f76ee4d +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_285/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_285/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8cf39581f1770d59d7865cef2d800872985e8ed9 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_285/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2854b14aca52775a5f1a3b286870ae01fe01c57cdb12ee9b8baf53fb2ffc04ec +size 313409 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_288/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_288/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f671c099509d082e95f70385c8778a32365c3072 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_288/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41cc4c7e296469f3eecd8c39ac96ae95bf125b742fa13da1c7e30d40a155ba5 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_288/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_288/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..53a2b24cba35c3b38c0d5830047fb41020d6a7eb --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_288/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58d07da49f24598b3c0a81600a45eff714d14e6712b85e306a1e05d55dd3cee2 +size 313414 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_291/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_291/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8312d5d0cb94704c43096b2ea378c2ccb5883634 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_291/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c489188fb54b62b2305a16f047bdbfcb847c02cb05c26d1c191e1e51fe079a +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_291/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_291/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..91a5eb455abafb9e258ed10946fc6176ea234ff3 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_291/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e86c25deb16b1d46591966af6ef1d6b2d3a855a2d514763b0b4df72ddca248 +size 313409 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_294/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_294/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cba32d051519c4e9de7b76e357d98c6b57dd7cf3 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_294/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f415c9863b6ea7a2e87723620b10656d3f73f476e563e853d37d19caa4828d2d +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_294/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_294/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f22caaf9236c8590a81b218576aa7ec638c4d780 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_294/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32064862d5aa24acbd5b4256415b789330c0b67fe88ef584ca3faba6d236ae9e +size 313379 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_297/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_297/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a98b80cadea56fbfb6b22589067d26800fa9bdb9 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_297/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a399f5b68334892d64ebd00cb52254c96f3997fb5c5d94a2430661f265ba160 +size 3189 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_297/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_297/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..296c969b04e518f167ba48373e38db39866b0ce7 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_297/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4762e50dc2516cebac7305c82d82547be13308d9598a7809229184a6f0b70883 +size 313285 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_300/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_300/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..16ee04d09940f44f26302445d8165745d1fda69b --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_300/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63042dd21cc3a7e03dcf3b6104f632f6587f92c77ec51c4743438bdd285b114 +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_300/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_300/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..93ec7808844c33960ef25ec98e31a56c10dc414b --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_300/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65e26eec26a77b5d0ba6f48a8660c1da8154e1b8464b992d5e7cab32257659be +size 313249 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_303/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_303/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f5305072767b6e3157a2725f5df760ea74f3443 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_303/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502427d919a7852ca4251a5a4086fd2a9eeb7e062582408610e8444c654da90a +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_303/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_303/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c891c9a0ef2fea668d7f27f0b52296ff9cb0dc67 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_303/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27865349619c6cd223deb7ec8d442e4436c4ddbf961a4339ed1f6bfa17b13b80 +size 313356 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_306/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_306/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fc0f1e1ab464b1d96f431c2fde42e34639f10f8 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_306/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dbcf4682a5d2af877130383b24f21dded8102e8e700c8a8b89291ba0da2849c +size 3192 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_306/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_306/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5865d9ac18e7ecde75fdfd46eee9f1f3698f4e3 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_306/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d49f8aa5cdbd81740b524b943bead52552cdf400d106a6b1e703aff7cdf43c +size 313290 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_309/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_309/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d78a3abfb842e4ef0db8ca13743248f694e386c0 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_309/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b30b96d9ea9310e5dfefd0d695fdf3d4c54db28cc9da6392cc34e082c2017df +size 3194 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_309/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_309/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b30ab842efd7045f4413c56880a1029fd5fb0c18 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_309/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95caaae9eafb5ccfd9b4d492cc2f1e026ce6cf99331699273e9131eaa47572c4 +size 313218 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_312/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_312/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..247f7138fa73ebc033d6dd773959843717c6c435 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_312/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdce95dc2e335d28cdf4a289892a649460d05488947460066c7f505466948f19 +size 3195 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_312/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_312/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a226d184968817e0837bc3691486e6e97f05ce0 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_312/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45fe4c50ecd50ace8c614e6d31e1c402e31fe4bbfc002828f416ddb8f9cec715 +size 313323 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_315/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_315/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..787e24a8de7430570b2e5bbf82701744d90342b8 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_315/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9606da1f2c64fc24511ff757fda5fd0ca79689ef20003f6403956d86331ce211 +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_315/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_315/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0c010f9a62aef59b9c737fe7f4df9e2b86b9378b --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_315/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc88a9374bb69511bc4b4edb9d538e481bf22ea3b78d1e5a449819fcba802f52 +size 313125 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_318/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_318/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a2ae3c9b772d6a526b347160a08c15fd537f4e3 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_318/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16343c5e53161cc27aa9a8385110ea16fb2096104c0610525d5a2f0e50fe22b7 +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_318/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_318/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0513e9ab38f2ebed737d798980aeed51206b7d10 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_318/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29aef13d17cf35dd83b013a46f8a61a50210ea18f4c47f08c081c9463d713907 +size 313244 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_321/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_321/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3668eff625cc0f310be2689b2f2667076ec57c6a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_321/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f96d5453616a95c492188a2efe7ab862889ca470f77ee5a7f888d8bd0ddd9820 +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_321/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_321/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..358db7cd18076dd501510f5c21fc01b7e8572b3d --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_321/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b15781a1ac7f99333400d824f4ca9f030ddd35744fbffc5c8d9a198ae671a4 +size 313242 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_324/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_324/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b286156a0e998b5d5471da1f5d874d220aaf932 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_324/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84bae7f3f9cb6137644b203095dc2550d456b350ffed68c0c6a500ad3dde731a +size 3195 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_324/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_324/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f54c2d98324201b48fdf24fbbc76c978e5c3e5e4 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_324/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf058a0d087c89d649dd36624fa62faf498ab66c661435bbe22dda447ad2ec35 +size 313281 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_327/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_327/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3887318d87dbfb09fbd8b334bb9a42c7d3161c8 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_327/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd48a8387b385e8d4415ab3e226b01661f530c2a94d6e3fff89e6120bee6d7c0 +size 3188 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_327/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_327/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..32c0b55203ef645c3d6b36dd0adba6f451d297d2 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_327/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:821916a307b347ca9bcc7bed0f79183bb4c2c84af46105a7c65ef24f314ab580 +size 313402 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_330/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_330/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..36caf2509098466803bb27ae47752501d3e475dd --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_330/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6be23b6d264a03f16f54b1f1554b3c4800f466e9487dc4845eb80f7be236d00 +size 3190 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_330/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_330/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..26e14375b51c68a56e0d31c2b692c27ba5c47d7f --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_330/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab5254adb7c3b7498cd9d78985b62a96a0fbf24e16752cfd9436171b2f8e8ad +size 313360 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_333/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_333/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d7fa5343229121d6d200d79601956038f67bb43 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_333/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b717425d3c47570e3bce7aedf677c04238b0e3aefc75006f652c38222b66273 +size 3191 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_333/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_333/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..54539314165707f6b732acc3d7d6c321039dfe8e --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_333/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ccc08005f004a722b8448680d451d7fe6f4b55568ec38cb932f58eb7d15f0f +size 313492 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_336/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_336/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..945f7930f857c34c520ea75dfd131faea37968de --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_336/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3acd9bdfa80eb2c1256cacb575109d114fb94af4985116c5ab9be72aa08adb52 +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_336/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_336/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..04d4fa40538d5308693cb96200e5312cceff00b7 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_336/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2634a8824106e74d8ff31283cbee077b440b455f4da6fb604321d0aa7cc8e20d +size 313378 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_339/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_339/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c8c0b5ac003b8ce6ee41315784dc94686604f8d --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_339/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26cdd86373304f82d884e78d228a7211b5d143142adcab65b53b06b2589f15e5 +size 3192 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_339/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_339/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f528249dc76b4e65f8a71b6ba015bd500c7d1b8d --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_339/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5707b30e9b1c1d5b01eba68baec69c0f871bad03c1e4b3011612a4bf408c1e6d +size 313454 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_342/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_342/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..77984a26d25655eb28f60b0bd709c20ad298723d --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_342/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71c0095dbf43f14f39ae4e85beeb3678d270e89f5e0f780dd383fe74fac60666 +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_342/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_342/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2665198d9b9a224bf4da3ddc3d310a9e7ec22825 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_342/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f08d1f8ede5f79a07c1ca02b89ced87bd6cefa113455c543e1e72be6ee341db1 +size 313425 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_345/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_345/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa84bb7e26e4875cab782c843767bb078c0a071c --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_345/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81511a4d4c23f08acde994f7c951239b0f297b4640038b3dedb929b03e1da0cf +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_345/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_345/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e39228c8ce13e82c6a3abbfdd4b4fc9e5cb4c793 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_345/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:166d02d81e0c8c42e6820ee33486561443cbde23cacdfcf6521526a6ea337549 +size 313222 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_348/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_348/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..508dd42741d2cd6d4ef320287647e1ff759da63c --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_348/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:800a5069e879eeffda2051798dceba240910d870d51db1e30ed5108d1bbe7e5b +size 3192 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_348/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_348/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f0a0d5a89a9c6b6c5bc82d8c5b8ebe7107369aa --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_348/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae3c5cf987e23ed7dbf4fc2e5fd53957096dd97993d7d660b4cc24f033ca150c +size 313345 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_351/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_351/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c009608221afc425b591ea8024f2e9805ebcf1e --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_351/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a35cc69fc1d0a22041d6d578fefbdab9989b56222bcc7d2f00818435fecd95d +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_351/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_351/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfb310df92c806d8c1edf9d80a3f4c1e6841a577 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_351/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8ad8941033229a987dfcc94505b2f4144a8d3395cece052a859743f76f3cab +size 313456 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_354/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_354/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9f5ad67a908b8d34afd2fd2dacbdd7daf5bace8 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_354/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1248469d6b7c75d88512bf68a265a7cebd11ba38d6acfb4e53be395db52c6b1e +size 3192 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_354/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_354/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c98dfff7e23b38ada58d76caed442048f7ac6a61 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_354/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6fdd2d12de7634b766050afd761f09be160da1dccb5ab2c06e9dcb314035daa +size 313443 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_357/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_357/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a02aa650e00a95dd87e16d78d2b954e578d246f2 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_357/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9177938e006ea6bc7fcfe523774b873473d3f07ea3eaf60c96796194bda3a5a +size 3193 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_357/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_357/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0241351e5b430185e9da53ccda07b4065edb7359 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_357/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed82af4a2674c2985e84525d5fbc18647417420c447807f2a0a98630311e132 +size 313308 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_360/results.json.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_360/results.json.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f3dc7e30e13fea95543165cf7d416343045d8e5a --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_360/results.json.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:152a4030d4e042d55bb32e0825669d70ee07768b5cf9d566fcd61479a07310eb +size 3192 diff --git a/eval-results/truthfulqa_mc2/0/ckpt_360/truthfulqa_mc2.jsonl.tar.gz b/eval-results/truthfulqa_mc2/0/ckpt_360/truthfulqa_mc2.jsonl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..444b5044063d7fc44f84a386123034f0c9287627 --- /dev/null +++ b/eval-results/truthfulqa_mc2/0/ckpt_360/truthfulqa_mc2.jsonl.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68e13ada2815a26baaa7f3e36258881eb8a9f4996577811b31f378a4e798d392 +size 313421