diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/anagrams2-v0-greedy_until b/scripts/yans/lm-evaluation-harness/tests/testdata/anagrams2-v0-greedy_until new file mode 100644 index 0000000000000000000000000000000000000000..9db9d158dc07c46ddb5bc88ea797cc41080ca941 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/anagrams2-v0-greedy_until @@ -0,0 +1 @@ +6700a3c44e48abe8337238dcbe3b54cf4abafe0c204c52d921e590872fbd05e7 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/anli_r1-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/anli_r1-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..4450c0628e9d9a6f8ff90c9efa0c5e5b1b7e4069 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/anli_r1-v0-loglikelihood @@ -0,0 +1 @@ +3a84baf2f170e138c6ce0bc9f06f905def35d705fa2b8781f10c87aef404c4cb \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/anli_r2-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/anli_r2-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..4a437fc8a8bb7928fade05baac9319b74d939bf8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/anli_r2-v0-loglikelihood @@ -0,0 +1 @@ +d0ea3c3e09d533982c15b4c034439896d6af4bbafb2254d305e20215534a251d \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/arc_challenge-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/arc_challenge-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..91a3560635db37739cd7504bdc84c6c840192462 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/arc_challenge-v0-loglikelihood @@ -0,0 +1 @@ +41c34c96cca8ace661911d0033d630c554b283f5a3953bcdc50720ae6b00a9c1 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_2da-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_2da-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..874256a0b8ae0c6fe4874498ecb9e73f383f0d60 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_2da-v0-res.json @@ -0,0 +1 @@ +{"results": {"arithmetic_2da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_2da": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_2ds-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_2ds-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..a18e6eec6e5fc11e6a613618dddd770e96d8fdd8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_2ds-v0-res.json @@ -0,0 +1 @@ +{"results": {"arithmetic_2ds": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_2ds": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_5da-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_5da-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..fb9a5671e8a4269fce5c477cbc3c795801e75fe1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_5da-v0-res.json @@ -0,0 +1 @@ +{"results": {"arithmetic_5da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_5da": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_anaphor_gender_agreement-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_anaphor_gender_agreement-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..1c39ab70454e6589ea0c506e3e98bbd5a21449bf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_anaphor_gender_agreement-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_anaphor_gender_agreement": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_anaphor_gender_agreement": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_animate_subject_passive-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_animate_subject_passive-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..96a7ed5e2a8715027b1bf853cc1836b4f587a2e5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_animate_subject_passive-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_animate_subject_passive": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_animate_subject_passive": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..f1edb69cb10b150f68b62dad3a18b5248bba95d1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood @@ -0,0 +1 @@ +23ddafdff7b1ebe331b146e23b2c21aa109fe57aa1ce8ca201a0d239fcbdd166 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..80f2c6a7a02aec124cc43a4e0b0b48110ea60d02 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_coordinate_structure_constraint_object_extraction": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_coordinate_structure_constraint_object_extraction": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_irregular_past_participle_adjectives-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_irregular_past_participle_adjectives-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..a030be1d72c6a2d1794464b4c9b0cf2e48454197 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_irregular_past_participle_adjectives-v0-loglikelihood @@ -0,0 +1 @@ +47c56f336df11924d8b97feb46339ce55bea4b216b6fd13946cc999ea36a4a95 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_irregular_past_participle_verbs-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_irregular_past_participle_verbs-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..94d73d41da2f66060d05319caa8641493c7f8fc9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_irregular_past_participle_verbs-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_irregular_past_participle_verbs": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_irregular_past_participle_verbs": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_npi_present_1-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_npi_present_1-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..8e4ae8d6efba191c09ebc369b93437a441f188cb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_npi_present_1-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_npi_present_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_npi_present_1": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_only_npi_scope-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_only_npi_scope-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..f1846d3e936ffc75f39f0776024014444a2879bb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_only_npi_scope-v0-loglikelihood @@ -0,0 +1 @@ +fc0be817478c212327050fa297ef61ad214f4847dbff61d4e0fe7914c06a1691 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..8e254de7a73880a1880c1632e88d91fb4a9affdc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-loglikelihood @@ -0,0 +1 @@ +e6666c5657215ff4bfd646b8ee3ae6df956e71c0be9ab1c287fb1b68291dd0d1 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_sentential_negation_npi_scope-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_sentential_negation_npi_scope-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..fcaf915f36cfa6a15cb5cf52f786ad96adb8eecb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_sentential_negation_npi_scope-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_sentential_negation_npi_scope": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_sentential_negation_npi_scope": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_questions_object_gap-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_questions_object_gap-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..60228b79185dd96e5e71a1c2f85ade32348d9f10 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_questions_object_gap-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_wh_questions_object_gap": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_questions_object_gap": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_questions_subject_gap-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_questions_subject_gap-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..1a88f8fa87a86cbff20d0def9955059e9cd73861 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_questions_subject_gap-v0-loglikelihood @@ -0,0 +1 @@ +d5486ffcc075cad4302e37ece9bbf5b2063c0b5a48e76c8e1dd365e22a5a48fc \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..34b959139635a241b0fe814ce2ae7240c32a7c1c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood @@ -0,0 +1 @@ +eed67491bdf493a1dad8f1d9766bc7bd0e79946365b833c0f7eb81ac998e3dca \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/boolq-v1-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/boolq-v1-res.json new file mode 100644 index 0000000000000000000000000000000000000000..291b9f122d0219c93c941daeb9ae362c439bb4e0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/boolq-v1-res.json @@ -0,0 +1 @@ +{"results": {"boolq": {"acc": 0.5048929663608562, "acc_stderr": 0.00874463623355505}}, "versions": {"boolq": 1}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/coqa-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/coqa-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..9ca8024e3ba0fb80420952bceaf01d85c42b0fcc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/coqa-v0-res.json @@ -0,0 +1 @@ +{"results": {"coqa": {"em": 0.0, "em_stderr": 0.0, "f1": 0.0, "f1_stderr": 0.0}}, "versions": {"coqa": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_gender-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_gender-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..c24fb9dd6dfb9c494474fc08011d6d86ef18f5ef --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_gender-v0-res.json @@ -0,0 +1 @@ +{"results": {"crows_pairs_english_gender": {"likelihood_difference": 0.3361377482385407, "likelihood_difference_stderr": 0.012853081126751691, "pct_stereotype": 0.478125, "pct_stereotype_stderr": 0.027967820983765136}}, "versions": {"crows_pairs_english_gender": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/drop-v1-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/drop-v1-res.json new file mode 100644 index 0000000000000000000000000000000000000000..8f397b410df7a77e25fd2916787b6050a5806d1d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/drop-v1-res.json @@ -0,0 +1 @@ +{"results": {"drop": {"em": 0.0, "em_stderr": 0.0, "f1": 0.0, "f1_stderr": 0.0}}, "versions": {"drop": 1}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_cm-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_cm-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..f81a700903262aec7eae2b4c39260f3f2c8f1dd0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_cm-v0-res.json @@ -0,0 +1 @@ +{"results": {"ethics_cm": {"acc": 0.49987129987129986, "acc_stderr": 0.008022881531793336}}, "versions": {"ethics_cm": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_deontology-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_deontology-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..ab01349737c063432656f3951ae913a63a85adba --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_deontology-v0-loglikelihood @@ -0,0 +1 @@ +74ecebe322457d70afc16fde848978410a09b854dc65c47f428d100bd1593248 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_justice-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_justice-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..cc18a7e67b6f38aaf759bb9073314da42b86f992 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_justice-v0-loglikelihood @@ -0,0 +1 @@ +d7dfc44fea507b5c5c3a8218f79ed8197da8599ebb396d85feb91c25512126b6 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-abstract_algebra-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-abstract_algebra-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..d0d0fe872b24d1304c932ffde5546a70b125e100 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-abstract_algebra-v0-loglikelihood @@ -0,0 +1 @@ +e35d1eeb356ac1084d4e9773f028cb3c81ba1c6e5574d598ac4a78aa467cd797 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-conceptual_physics-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-conceptual_physics-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..1388bcdcd9a7283decbf3680283a2cc4cfc7cfde --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-conceptual_physics-v0-res.json @@ -0,0 +1 @@ +{"results": {"hendrycksTest-conceptual_physics": {"acc": 0.2680851063829787, "acc_norm": 0.2553191489361702, "acc_norm_stderr": 0.028504856470514185, "acc_stderr": 0.028957342788342347}}, "versions": {"hendrycksTest-conceptual_physics": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-human_sexuality-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-human_sexuality-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..091d7352ce1b260f6acbd1338b7d54c5716d23ce --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-human_sexuality-v0-res.json @@ -0,0 +1 @@ +{"results": {"hendrycksTest-human_sexuality": {"acc": 0.22137404580152673, "acc_norm": 0.22900763358778625, "acc_norm_stderr": 0.036853466317118506, "acc_stderr": 0.0364129708131373}}, "versions": {"hendrycksTest-human_sexuality": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-jurisprudence-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-jurisprudence-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..4ef181974956e3f899121ac46dc5e192231d1a65 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-jurisprudence-v0-res.json @@ -0,0 +1 @@ +{"results": {"hendrycksTest-jurisprudence": {"acc": 0.25, "acc_norm": 0.3148148148148148, "acc_norm_stderr": 0.04489931073591312, "acc_stderr": 0.04186091791394607}}, "versions": {"hendrycksTest-jurisprudence": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-logical_fallacies-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-logical_fallacies-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..a5807b5831206a8cb29b2e99c02c7c6025dd6f25 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-logical_fallacies-v0-loglikelihood @@ -0,0 +1 @@ +2e9449dd803f9e2334dc562d9f04031fd013ed36b883b44ab500533a5dbbface \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-prehistory-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-prehistory-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..e0163dd555e6c510aa24c5da5ad187ef52ed7c4d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-prehistory-v0-res.json @@ -0,0 +1 @@ +{"results": {"hendrycksTest-prehistory": {"acc": 0.2623456790123457, "acc_norm": 0.26851851851851855, "acc_norm_stderr": 0.024659685185967277, "acc_stderr": 0.02447722285613511}}, "versions": {"hendrycksTest-prehistory": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_accounting-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_accounting-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..fe5997427ef5df8be6d52709189b7baa8a410df9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_accounting-v0-loglikelihood @@ -0,0 +1 @@ +847418f7b22cd9b499e95fd73c40a2fbc40076895280cc2c560199c0c4c4f433 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-us_foreign_policy-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-us_foreign_policy-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..eed85dbaf98e67cc5cd0876bc5f73f3f9fb186fb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-us_foreign_policy-v0-loglikelihood @@ -0,0 +1 @@ +a1a338d0083a21054f74d36a296d6bd8e2e457327c0fd630bebcc61ed758044d \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_mt_en-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_mt_en-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..efd450a8f2a4ca067f7380af809fdda48d1ee465 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_mt_en-v0-loglikelihood @@ -0,0 +1 @@ +6829e6a8aa5922e6c92dd31403cc060f242dc0ede4a775e085a70da095ab2e20 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai_mt_fr-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai_mt_fr-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..ab6217943ab5d8e7547655c90ec95553c9557ee8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai_mt_fr-v0-res.json @@ -0,0 +1 @@ +{"results": {"lambada_openai_mt_fr": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_openai_mt_fr": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/math_geometry-v0-greedy_until b/scripts/yans/lm-evaluation-harness/tests/testdata/math_geometry-v0-greedy_until new file mode 100644 index 0000000000000000000000000000000000000000..1c7362fe44e4432f56f18932b4b429d5cf573399 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/math_geometry-v0-greedy_until @@ -0,0 +1 @@ +46bc4cb219b6903397da782699a684bdbb982c0c954ff82e6beeed5c84878f42 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..7a195d9ac43e6feb4a7fc354f5dc424a27b0bf7d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v0-res.json @@ -0,0 +1 @@ +{"results": {"math_intermediate_algebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_intermediate_algebra": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/math_precalc-v1-greedy_until b/scripts/yans/lm-evaluation-harness/tests/testdata/math_precalc-v1-greedy_until new file mode 100644 index 0000000000000000000000000000000000000000..71bbd8d9c221ca484d517bda46c109b2610f79f6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/math_precalc-v1-greedy_until @@ -0,0 +1 @@ +bc834b06fd79473ca6fe38a51b714aad0bf0478c1b0eec787eca34dbdf69cb71 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/mc_taco-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/mc_taco-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..fc36d1ed3ff4d02330a13eb7431d5413b4c484e5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/mc_taco-v0-res.json @@ -0,0 +1 @@ +{"results": {"mc_taco": {"em": 0.07732732732732733, "f1": 0.41600515965511614}}, "versions": {"mc_taco": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_arxiv-v0-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_arxiv-v0-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..3aa1d8c7349449271fbd81fbbc06fde47a116028 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_arxiv-v0-loglikelihood_rolling @@ -0,0 +1 @@ +814f9954e44368559602c00f7e85fa3971acdfd0315f508ec7df6318a79c55ec \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_arxiv-v1-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_arxiv-v1-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..3aa1d8c7349449271fbd81fbbc06fde47a116028 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_arxiv-v1-loglikelihood_rolling @@ -0,0 +1 @@ +814f9954e44368559602c00f7e85fa3971acdfd0315f508ec7df6318a79c55ec \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..b37a91cc2dea829e8dab7bb0fe934442c54b3a26 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling @@ -0,0 +1 @@ +5c17ddfebeab8c41dabadb6fc216ceda91e3fe5dc95aaf1b2c843d7f11828b03 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v1-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v1-res.json new file mode 100644 index 0000000000000000000000000000000000000000..192e9066a42acf28436ae325a212b2a7c2ebf517 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v1-res.json @@ -0,0 +1 @@ +{"results": {"pile_dm-mathematics": {"bits_per_byte": 8.910951449933553e-05, "byte_perplexity": 1.0000617679162955, "word_perplexity": 1.0002875035042451}}, "versions": {"pile_dm-mathematics": 1}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_europarl-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_europarl-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..4c53edd2ce620475c056ccca5cde73380c246074 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_europarl-v0-res.json @@ -0,0 +1 @@ +{"results": {"pile_europarl": {"bits_per_byte": 8.648858203555344e-06, "byte_perplexity": 1.000008648895605, "word_perplexity": 1.000063506523818}}, "versions": {"pile_europarl": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_freelaw-v1-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_freelaw-v1-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..7b5771f4911f3069217d75d12cbdfa1a579b6663 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_freelaw-v1-loglikelihood_rolling @@ -0,0 +1 @@ +d77f3f68aadd6cbf1290c2f6737b2ed5d5c2a60e4c81a65c280f207783caabe1 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_freelaw-v1-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_freelaw-v1-res.json new file mode 100644 index 0000000000000000000000000000000000000000..dd0e0bac36b116bddbcd70d4327c3cdb3e3630e9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_freelaw-v1-res.json @@ -0,0 +1 @@ +{"results": {"pile_freelaw": {"bits_per_byte": 4.5623635481434923e-05, "byte_perplexity": 1.0000316243943415, "word_perplexity": 1.000203169094218}}, "versions": {"pile_freelaw": 1}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_github-v0-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_github-v0-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..cf8251e4f68e2e893624142031e80d4d5777f4f2 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_github-v0-loglikelihood_rolling @@ -0,0 +1 @@ +df384c3df3d8f53273e97127c5bb84c17e638acad7d6bc9c91f6dee96d43b639 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_opensubtitles-v0-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_opensubtitles-v0-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..47805d3b5fe82555e4d61a90b43c157c974ddabc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_opensubtitles-v0-loglikelihood_rolling @@ -0,0 +1 @@ +0f1c23a1f4ddec0c2b1ff34de8d1505b0eb9e2868d8edbcc1b6de13d02f32036 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_opensubtitles-v1-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_opensubtitles-v1-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..47805d3b5fe82555e4d61a90b43c157c974ddabc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_opensubtitles-v1-loglikelihood_rolling @@ -0,0 +1 @@ +0f1c23a1f4ddec0c2b1ff34de8d1505b0eb9e2868d8edbcc1b6de13d02f32036 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_openwebtext2-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_openwebtext2-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..ead8d0b0bffe2da65e2602c7f2f352eeb404ef26 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_openwebtext2-v0-res.json @@ -0,0 +1 @@ +{"results": {"pile_openwebtext2": {"bits_per_byte": 0.00012809520662477846, "byte_perplexity": 1.000128103411166, "word_perplexity": 1.0007951516532847}}, "versions": {"pile_openwebtext2": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_pile-cc-v0-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_pile-cc-v0-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..d5369ed3c97838d67c2900cfac4aaeb5881ec884 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_pile-cc-v0-loglikelihood_rolling @@ -0,0 +1 @@ +731fdef4a43949b179ba0c540148ebc2fa41583dd583ef580dd812076c66a451 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_pile-cc-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_pile-cc-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..383233f259507dfdbc6556834185f1eb6161f9cd --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_pile-cc-v0-res.json @@ -0,0 +1 @@ +{"results": {"pile_pile-cc": {"bits_per_byte": 0.00011234131907228174, "byte_perplexity": 1.0001123476295946, "word_perplexity": 1.0006738958554477}}, "versions": {"pile_pile-cc": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_pubmed-abstracts-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_pubmed-abstracts-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..333c2970fa21b0bd53b77bdc3880acad0c8d6459 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_pubmed-abstracts-v0-res.json @@ -0,0 +1 @@ +{"results": {"pile_pubmed-abstracts": {"bits_per_byte": 0.00037553733051528816, "byte_perplexity": 1.0003756078534862, "word_perplexity": 1.0025884332779}}, "versions": {"pile_pubmed-abstracts": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_stackexchange-v0-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_stackexchange-v0-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..dcf0e64cf0d4bf4fe719b8d349c1d36484d2047f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_stackexchange-v0-loglikelihood_rolling @@ -0,0 +1 @@ +e524bfb3e21cbdaddc117403a50df598520c7bf5b2c60ad8f2372cfa564e79be \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_stackexchange-v1-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_stackexchange-v1-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..dcf0e64cf0d4bf4fe719b8d349c1d36484d2047f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_stackexchange-v1-loglikelihood_rolling @@ -0,0 +1 @@ +e524bfb3e21cbdaddc117403a50df598520c7bf5b2c60ad8f2372cfa564e79be \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_uspto-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_uspto-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..c13dfc73f5927415055cf393fb16bd13ba6b1b56 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_uspto-v0-res.json @@ -0,0 +1 @@ +{"results": {"pile_uspto": {"bits_per_byte": 0.00012062434384130924, "byte_perplexity": 1.00012063161925, "word_perplexity": 1.0007716198916954}}, "versions": {"pile_uspto": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_wikipedia-v0-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_wikipedia-v0-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..e44bd2762803a9b922febf4fe8bfd459e95174b9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_wikipedia-v0-loglikelihood_rolling @@ -0,0 +1 @@ +ef9ec0dd408316ca6537228a6812e839f14b30608973081d41efc47c138338da \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_wikipedia-v1-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_wikipedia-v1-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..e44bd2762803a9b922febf4fe8bfd459e95174b9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_wikipedia-v1-loglikelihood_rolling @@ -0,0 +1 @@ +ef9ec0dd408316ca6537228a6812e839f14b30608973081d41efc47c138338da \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_youtubesubtitles-v1-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_youtubesubtitles-v1-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..81c2e5ed06321b250a08a4232b3720ea5b650156 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_youtubesubtitles-v1-loglikelihood_rolling @@ -0,0 +1 @@ +68263c52adc0086011e2220b619983935cabb1cc1f5f9f8ee1a74ab2a7457967 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/piqa-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/piqa-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..bb6ebfb9a268d8fcfd3dd35ecb26fee05a1b8090 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/piqa-v0-res.json @@ -0,0 +1 @@ +{"results": {"piqa": {"acc": 0.514145810663765, "acc_norm": 0.5114254624591947, "acc_norm_stderr": 0.01166277802645167, "acc_stderr": 0.011661154475524836}}, "versions": {"piqa": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/qa4mre_2012-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/qa4mre_2012-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..91d8f3660413469e7ab00c3af1102392c3e26cc7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/qa4mre_2012-v0-res.json @@ -0,0 +1 @@ +{"results": {"qa4mre_2012": {"acc": 0.15625, "acc_norm": 0.16875, "acc_norm_stderr": 0.029702236908328808, "acc_stderr": 0.02879508360159146}}, "versions": {"qa4mre_2012": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/qnli-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/qnli-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..31c3097605f33c489d4f2552ce3060cd7a9155e3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/qnli-v0-res.json @@ -0,0 +1 @@ +{"results": {"qnli": {"acc": 0.5108914515833791, "acc_stderr": 0.00676380528502966}}, "versions": {"qnli": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/sst-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/sst-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..52050de16b54b432bdd68fae780660a035b10c0a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/sst-v0-loglikelihood @@ -0,0 +1 @@ +d2ebe3a63517d1d481aa1513bebe124c57a0904554a1e95f566979cfe67b1a7f \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/swag-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/swag-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..c8152027dc2f15319676bc32d55f32ca0bec00b5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/swag-v0-loglikelihood @@ -0,0 +1 @@ +be4fcbad876124c4ba3c71970538a97fec0e36a9cc677c70b6c9243a7bcee0ec \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_51b5302f157cf224f694ccad973f255ae19e9e061d533256bdf75b04e0a917ab.pkl b/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_51b5302f157cf224f694ccad973f255ae19e9e061d533256bdf75b04e0a917ab.pkl new file mode 100644 index 0000000000000000000000000000000000000000..622dc4ebf4fa281171d3cd9b5f6437401c8b8cfc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_51b5302f157cf224f694ccad973f255ae19e9e061d533256bdf75b04e0a917ab.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22cb4aaeb7f974ef2dc2ae63572f8936710e39ba48b4f08f5fd37b201dec6148 +size 1766 diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/webqs-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/webqs-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..4d604d438db6c4cc77a43aca8d2a7f605aef6b1c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/webqs-v0-loglikelihood @@ -0,0 +1 @@ +96b218173468cc94552a0b946193bda89faba51f1bfc3e7945531f9dff8d6fe9 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wikitext-v1-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/wikitext-v1-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..f09af45a38c0de097358c587420858c7a53a10aa --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wikitext-v1-loglikelihood_rolling @@ -0,0 +1 @@ +b6f83e6cf7535ee41b0057c3e2ec2cf7f2fa5a9119b305c479a83091d1142b2c \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wikitext-v1-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/wikitext-v1-res.json new file mode 100644 index 0000000000000000000000000000000000000000..122098aec22e39599f1d3bffbb4bf619131d2335 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wikitext-v1-res.json @@ -0,0 +1 @@ +{"results": {"wikitext": {"bits_per_byte": 3.202519859941674e-05, "byte_perplexity": 1.0000221984224973, "word_perplexity": 1.000118710696617}}, "versions": {"wikitext": 1}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-km-v0-greedy_until b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-km-v0-greedy_until new file mode 100644 index 0000000000000000000000000000000000000000..ddce46a79fdcb08c3eee1a534c11fc4dd796be53 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-km-v0-greedy_until @@ -0,0 +1 @@ +eb5365c46f22ffec9a157991627d6e1fd1117fccffaedfc73619e93bafb5a408 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-ps-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-ps-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..fcfb51f05320490d089cc8fbd4127a987bc868c0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-ps-v0-res.json @@ -0,0 +1 @@ +{"results": {"wmt20-en-ps": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 2.1193813610582323e-06, "chrf_stderr": 2.113911466119111e-06, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ps": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-zh-v0-greedy_until b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-zh-v0-greedy_until new file mode 100644 index 0000000000000000000000000000000000000000..db79b7f03fcfc8f7720f1344339e7d94d8a01ebf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-zh-v0-greedy_until @@ -0,0 +1 @@ +67f0333ddbcb07d7a9ac12919129a18fe4fea24e4826a11bbdde4fd5ed5ed83f \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-km-en-v0-greedy_until b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-km-en-v0-greedy_until new file mode 100644 index 0000000000000000000000000000000000000000..a6f148661093f5f97c85e96ab2c01da6e40c1547 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-km-en-v0-greedy_until @@ -0,0 +1 @@ +fb4ec81bb89c70df7e21b43e0e882915b7b71a2a85bb8d4b59e0c7938baaa4c2 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-km-en-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-km-en-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..4f6dc98604bdeed7b87806094a6ffc3b0cbbfec4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-km-en-v0-res.json @@ -0,0 +1 @@ +{"results": {"wmt20-km-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.015142474534585969, "chrf_stderr": 0.0001518735048829897, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-km-en": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-pl-en-v0-greedy_until b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-pl-en-v0-greedy_until new file mode 100644 index 0000000000000000000000000000000000000000..899ce01919910ab80fc95b99371bf334a7c7c37a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-pl-en-v0-greedy_until @@ -0,0 +1 @@ +89274499d84176b1ffe4eaec06f2c89ca807342384dc946c2e348d00116aaade \ No newline at end of file