Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/anagrams2-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/anli_r3-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_2ds-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_3ds-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_4da-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_adjunct_island-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_drop_argument-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_existential_there_quantifiers_1-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_left_branch_island_echo_question-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_npi_present_2-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_principle_A_domain_3-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_superlative_quantifiers_1-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_tough_vs_raising_1-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_transitive-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/cb-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_age-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_nationality-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_sexual_orientation-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_sexual_orientation-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_age-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_socioeconomic-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/cycle_letters-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/gpt3_test_8025023377febbd8c5f2b9f26705c394ff375d0cad7c89c10fd9b8e1eb66ff1c.pkl +3 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/headqa-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-abstract_algebra-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-anatomy-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-astronomy-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_chemistry-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-econometrics-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_biology-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_computer_science-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_european_history-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_microeconomics-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_world_history-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-nutrition-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-prehistory-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_law-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_medicine-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_medicine-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_mt_fr-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_openai_cloze-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_algebra-v0-greedy_until +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_geometry-v0-res.json +1 -0
scripts/yans/eval/lm-evaluation-harness/tests/testdata/anagrams2-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"anagrams2": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"anagrams2": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/anli_r3-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
6b6e5c6a794f2fbff78b7aa24fe0c90156039334bbd1cb34f7af9fc6e6183845
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_2ds-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
66f7ff3b40251ee38fadcbee658e309a200224356fc3efa07d0a490a2c24bfa3
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_3ds-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
d3d8bad8827d4530945a1d8b3c7589c0235bbed0bc89e7561a6fdac678f6ce5c
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_4da-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"arithmetic_4da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_4da": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_adjunct_island-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_adjunct_island": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_adjunct_island": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ddb24ddfaebe076b3aa7107937d71bf5f4503a78283bc889e39200368603681e
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_drop_argument-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_drop_argument": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_drop_argument": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_existential_there_quantifiers_1-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
d77594382e6d9af31a8b8ef00ba1ef6c29d6be6d0ddb7a9c27ef25ace654e05a
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_left_branch_island_echo_question-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_left_branch_island_echo_question": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_left_branch_island_echo_question": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_npi_present_2-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
fdb688ac6259bb65d234ef0a36e9a9ee449f9608f633b12e1943b462aead8e17
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_principle_A_domain_3-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_principle_A_domain_3": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_domain_3": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_regular_plural_subject_verb_agreement_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_regular_plural_subject_verb_agreement_1": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_regular_plural_subject_verb_agreement_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_regular_plural_subject_verb_agreement_2": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_superlative_quantifiers_1-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_superlative_quantifiers_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_superlative_quantifiers_1": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_tough_vs_raising_1-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_tough_vs_raising_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_tough_vs_raising_1": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_transitive-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
d0d47fe40a7ee558ba782edbc4f49f7d9123c8472a36decc97f8ab142b45b9d8
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
37483dfda688b62ad27161c9fc1e1e7710c5a6e6a7cd3474df119bcafd30e97f
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_wh_vs_that_with_gap": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_vs_that_with_gap": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/cb-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ec3b1bbb9561e39c43c6f77a23b4060b15c606141c5346e3d0791b3e92aaa5d0
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_age-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
de74d2ac7f926f2f486c045d84aae8f71711102f9d77b31f758fd148810d13d3
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_nationality-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"crows_pairs_english_nationality": {"likelihood_difference": 0.3383027778174895, "likelihood_difference_stderr": 0.015957585374543233, "pct_stereotype": 0.4675925925925926, "pct_stereotype_stderr": 0.03402801581358966}}, "versions": {"crows_pairs_english_nationality": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_sexual_orientation-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
e754a309296b157677dfba6e6feef983d1ce38dd0169ae726265621a7b573163
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_sexual_orientation-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"crows_pairs_english_sexual_orientation": {"likelihood_difference": 0.31947594049467243, "likelihood_difference_stderr": 0.024404952720497735, "pct_stereotype": 0.43010752688172044, "pct_stereotype_stderr": 0.051616798980291805}}, "versions": {"crows_pairs_english_sexual_orientation": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_age-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"crows_pairs_french_age": {"likelihood_difference": 0.31896094607685194, "likelihood_difference_stderr": 0.024068391933540753, "pct_stereotype": 0.4444444444444444, "pct_stereotype_stderr": 0.05267171812666418}}, "versions": {"crows_pairs_french_age": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_socioeconomic-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"crows_pairs_french_socioeconomic": {"likelihood_difference": 0.3394681494647815, "likelihood_difference_stderr": 0.01702488895584347, "pct_stereotype": 0.4642857142857143, "pct_stereotype_stderr": 0.035714285714285705}}, "versions": {"crows_pairs_french_socioeconomic": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/cycle_letters-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"cycle_letters": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"cycle_letters": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/gpt3_test_8025023377febbd8c5f2b9f26705c394ff375d0cad7c89c10fd9b8e1eb66ff1c.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:590805560ee790d530c075ad76633eb2e9749440083e0bab63489ff920fdfd33
|
3 |
+
size 70917
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/headqa-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"headqa": {"acc": 0.23559445660102116, "acc_norm": 0.25018234865062, "acc_norm_stderr": 0.008272783230806014, "acc_stderr": 0.008105688874297972}}, "versions": {"headqa": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-abstract_algebra-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-abstract_algebra": {"acc": 0.32, "acc_norm": 0.34, "acc_norm_stderr": 0.04760952285695235, "acc_stderr": 0.04688261722621504}}, "versions": {"hendrycksTest-abstract_algebra": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-anatomy-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-anatomy": {"acc": 0.2222222222222222, "acc_norm": 0.23703703703703705, "acc_norm_stderr": 0.03673731683969506, "acc_stderr": 0.0359144408419697}}, "versions": {"hendrycksTest-anatomy": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-astronomy-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-astronomy": {"acc": 0.2565789473684211, "acc_norm": 0.29605263157894735, "acc_norm_stderr": 0.03715062154998904, "acc_stderr": 0.0355418036802569}}, "versions": {"hendrycksTest-astronomy": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_chemistry-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
044752b21540db95118b8cbe7e75c4c9b8758e27df56543deaeadec7f749a28d
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-econometrics-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
cde76ba2c7382b4876e17136c94f52aca2774e50342ab757b2a2d18da370dcb6
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_biology-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-high_school_biology": {"acc": 0.23870967741935484, "acc_norm": 0.2709677419354839, "acc_norm_stderr": 0.025284416114900152, "acc_stderr": 0.024251071262208834}}, "versions": {"hendrycksTest-high_school_biology": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_computer_science-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-high_school_computer_science": {"acc": 0.2, "acc_norm": 0.22, "acc_norm_stderr": 0.04163331998932269, "acc_stderr": 0.04020151261036845}}, "versions": {"hendrycksTest-high_school_computer_science": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_european_history-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-high_school_european_history": {"acc": 0.23636363636363636, "acc_norm": 0.24242424242424243, "acc_norm_stderr": 0.03346409881055953, "acc_stderr": 0.033175059300091805}}, "versions": {"hendrycksTest-high_school_european_history": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ce4faae2fb6628caa48f6fc74cbc848880db49e6ff51079392778a2322bcefef
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_microeconomics-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
513b998585ebc1ebdefca6435b7c84fd73dc36fc80321a22503467f04efed23e
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_world_history-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-high_school_world_history": {"acc": 0.23628691983122363, "acc_norm": 0.24472573839662448, "acc_norm_stderr": 0.02798569938703642, "acc_stderr": 0.027652153144159263}}, "versions": {"hendrycksTest-high_school_world_history": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-nutrition-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
19e49d218f55ed5ec4bd1a6cd3f3388c6f620b81484e7abe8b298e5481c3044d
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-prehistory-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
6983c560a562749f4f702249a3a6ae51fa495acc0643a980bf2cf52c6c5d4b95
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_law-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-professional_law": {"acc": 0.2561929595827901, "acc_norm": 0.2470664928292047, "acc_norm_stderr": 0.011015752255279352, "acc_stderr": 0.011149173153110582}}, "versions": {"hendrycksTest-professional_law": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_medicine-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
7a30599858398169cde61430c18efdd7fb4dcd09c34aa9baba70f0f8cf17a9f1
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_medicine-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-professional_medicine": {"acc": 0.23161764705882354, "acc_norm": 0.2536764705882353, "acc_norm_stderr": 0.02643132987078953, "acc_stderr": 0.025626533803777562}}, "versions": {"hendrycksTest-professional_medicine": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"lambada": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_mt_fr-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"lambada_mt_fr": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_fr": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_openai_cloze-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
7655e748b63ae7e9911411d2d2a2577221d6c861ca4448509992541294d689f3
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_algebra-v0-greedy_until
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
f19182ce697a2c095d9e5b56ee6659dc38c93994b69ca75d7c3d3f5fd87572b4
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_geometry-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"math_geometry": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_geometry": 0}}
|