Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/anli_r3-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_2dm-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_3da-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_adjunct_island-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_anaphor_number_agreement-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_anaphor_number_agreement-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_causative-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_1-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_1-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_existential_there_object_raising-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_intransitive-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_irregular_past_participle_adjectives-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_left_branch_island_simple_question-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_passive_2-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_no_gap-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/copa-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_autre-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_disability-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_gender-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_physical_appearance-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_religion-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/ethics_deontology-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/gsm8k-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-business_ethics-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_physics-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_physics-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-international_law-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-marketing-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-miscellaneous-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-nutrition-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_accounting-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_law-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-security_studies-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-us_foreign_policy-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-world_religions-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/iwslt17-ar-en-v0-greedy_until +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_mt_es-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_openai_mt_es-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_standard-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/logiqa-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_algebra-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v0-greedy_until +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v0-greedy_until +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_num_theory-v1-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_prealgebra-v0-greedy_until +1 -0
scripts/yans/eval/lm-evaluation-harness/tests/testdata/anli_r3-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"anli_r3": {"acc": 0.31916666666666665, "acc_stderr": 0.01346230971200514}}, "versions": {"anli_r3": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_2dm-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"arithmetic_2dm": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_2dm": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_3da-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
c421f9cd5a5001b80e528441da925128177a04db8526ebcdab543a90b33c9ce2
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_adjunct_island-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
976a5cac4bdb724632eebd4cb9e522203ce3da8d5525288a597c86e80469f3f2
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_anaphor_number_agreement-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0bdad31c974ba064e1f1ba931841ec2ba7461e8b0ca54ea5f79f08b6bae0bab5
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_anaphor_number_agreement-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_anaphor_number_agreement": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_anaphor_number_agreement": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_causative-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3d67ad025185dbb0808ebd7f508edcb5750c18fc3c01ad91f20fda80780c916c
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_1-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
2df8cc7f17089f7e8c7d974dcb324c809d30ef059a5be22aed6b69f44230809f
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_1-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_determiner_noun_agreement_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_1": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
95acb74fac7d57ae2c9d208361a5f8ad36b0b19a055f02e648ed8e99505f4b43
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ad61c619aa79433d02f1aeacde2ab87291fd5d5c370032c24d41c4f0065ed1f9
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
007c47e5fbf88119c5180feef75e1345d448e56adcd4c7ab2d52fb8d67350d34
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_existential_there_object_raising-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
63567712076256f373131971676c1c6d711efef73cd0e4de3cc639bc631a2413
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_intransitive-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_intransitive": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_intransitive": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_irregular_past_participle_adjectives-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_irregular_past_participle_adjectives": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_irregular_past_participle_adjectives": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_left_branch_island_simple_question-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_left_branch_island_simple_question": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_left_branch_island_simple_question": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_passive_2-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_passive_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_passive_2": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
f69d9891f59872538962221fccc425b07df7cfbd83cdc546ce83e6b0e9a93f7c
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_no_gap-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_wh_vs_that_no_gap": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_vs_that_no_gap": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/copa-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"copa": {"acc": 0.48, "acc_stderr": 0.050211673156867795}}, "versions": {"copa": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"crows_pairs_french": {"likelihood_difference": 0.3367363060632734, "likelihood_difference_stderr": 0.005827747024053628, "pct_stereotype": 0.5062611806797853, "pct_stereotype_stderr": 0.012212341600228745}}, "versions": {"crows_pairs_french": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_autre-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
f145ad5086da0bf8c76f0730258529fa243efe32b7ab792d3c4716284b4b5495
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_disability-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
fa1e5fc7492a66c9a90765e605003c38408347617db5ecf36706f1d374af5d42
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_gender-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"crows_pairs_french_gender": {"likelihood_difference": 0.3364019171359413, "likelihood_difference_stderr": 0.012815700745990895, "pct_stereotype": 0.4766355140186916, "pct_stereotype_stderr": 0.027920316348204986}}, "versions": {"crows_pairs_french_gender": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_physical_appearance-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"crows_pairs_french_physical_appearance": {"likelihood_difference": 0.3221673223187262, "likelihood_difference_stderr": 0.026978346460100555, "pct_stereotype": 0.4027777777777778, "pct_stereotype_stderr": 0.05820650942569533}}, "versions": {"crows_pairs_french_physical_appearance": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_religion-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"crows_pairs_french_religion": {"likelihood_difference": 0.32691651640972225, "likelihood_difference_stderr": 0.021833493193249474, "pct_stereotype": 0.45217391304347826, "pct_stereotype_stderr": 0.046614569799583463}}, "versions": {"crows_pairs_french_religion": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/ethics_deontology-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"ethics_deontology": {"acc": 0.503615127919911, "acc_stderr": 0.008338908432085105, "em": 0.07119021134593993}}, "versions": {"ethics_deontology": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/gsm8k-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"gsm8k": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"gsm8k": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-business_ethics-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-business_ethics": {"acc": 0.29, "acc_norm": 0.27, "acc_norm_stderr": 0.044619604333847394, "acc_stderr": 0.045604802157206845}}, "versions": {"hendrycksTest-business_ethics": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_physics-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
704a7671ef981fb95594782bc446dd632e87ebdbe89436a0603b714fb5786c75
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_physics-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-high_school_physics": {"acc": 0.2582781456953642, "acc_norm": 0.271523178807947, "acc_norm_stderr": 0.03631329803969653, "acc_stderr": 0.035737053147634576}}, "versions": {"hendrycksTest-high_school_physics": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-international_law-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ea9b2cefd27959db564168f6ad1169a5eaa012fc5a5d5b8faf9e34d94e335dc1
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-marketing-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
b4fa0681fe54671a80509779d4338d744097a7206687f62977df7145dfa74a66
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-miscellaneous-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
972dd88dbbaf09d14766e243cfc233425e7c01a26dbc61bdb9eeefa788822331
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-nutrition-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-nutrition": {"acc": 0.24509803921568626, "acc_norm": 0.28104575163398693, "acc_norm_stderr": 0.025738854797818723, "acc_stderr": 0.02463004897982476}}, "versions": {"hendrycksTest-nutrition": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_accounting-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-professional_accounting": {"acc": 0.2553191489361702, "acc_norm": 0.26595744680851063, "acc_norm_stderr": 0.026358065698880582, "acc_stderr": 0.026011992930902006}}, "versions": {"hendrycksTest-professional_accounting": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_law-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
c38c9d5d84eeb7a5f3c4a34d6e70d7e15847b3c38f26e4b119c982bb935e118f
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-security_studies-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
92dfffe2acf3278256486d3e1cf1edb5a739ad0a54c0f9c67695f7a411ed5f76
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-us_foreign_policy-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-us_foreign_policy": {"acc": 0.2, "acc_norm": 0.24, "acc_norm_stderr": 0.04292346959909283, "acc_stderr": 0.040201512610368445}}, "versions": {"hendrycksTest-us_foreign_policy": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-world_religions-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-world_religions": {"acc": 0.21637426900584794, "acc_norm": 0.22807017543859648, "acc_norm_stderr": 0.03218093795602357, "acc_stderr": 0.03158149539338734}}, "versions": {"hendrycksTest-world_religions": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/iwslt17-ar-en-v0-greedy_until
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
e94d310de91fad7ce36f4cf3305552020221482c5588f2efcefaa019893504f1
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_mt_es-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
4a88f4b316c72fe0396c382d6cbb33568ac4d0ad225150d3536635c085359fc9
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_openai_mt_es-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"lambada_openai_mt_es": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_openai_mt_es": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_standard-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
8958d9f8d8145046b692fadd8a9cc9c8bad5617c10774280cf7c24c21d2be160
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/logiqa-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"logiqa": {"acc": 0.25806451612903225, "acc_norm": 0.2764976958525346, "acc_norm_stderr": 0.017543209075825194, "acc_stderr": 0.017162894755127077}}, "versions": {"logiqa": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_algebra-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"math_algebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_algebra": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v0-greedy_until
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
2aa9ae43ee9dbb2457525247d7b65358632c5eaa9cbfc40cf95a4f17f5d942ad
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v0-greedy_until
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
d53c699de272d517ed7ad783b4e692302be9f9f97a8d4ac7a6541e538a7cabe0
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_num_theory-v1-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"math_num_theory": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_num_theory": 1}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_prealgebra-v0-greedy_until
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
752cdf343d7152e476b0273065024f6ea0e0f47ea385c6bdf9067736cb39724a
|