Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/anagrams1-v0-greedy_until +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_2dm-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_animate_subject_passive-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_distractor_agreement_relative_clause-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_drop_argument-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_ellipsis_n_bar_1-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_existential_there_subject_raising-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_only_npi_licensor_present-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_principle_A_case_2-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_sentential_negation_npi_scope-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_no_gap-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/cb-v1-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_disability-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_race_color-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_race_color-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_religion-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_socioeconomic-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/gpt3_test_f307d52964c295e2005c5e782b688c24388e0cecadf29f1e6fc7f394236ea9c0.pkl +3 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hellaswag-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_computer_science-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_mathematics-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_medicine-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_medicine-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_physics-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-conceptual_physics-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-formal_logic-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-global_facts-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_psychology-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-human_aging-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-human_sexuality-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-machine_learning-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-marketing-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-moral_scenarios-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_psychology-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_cloze-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_mt_it-v0-loglikelihood +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v1-greedy_until +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v1-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_prealgebra-v1-greedy_until +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/mnli_mismatched-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/mrpc-v0-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/pile_books3-v1-res.json +1 -0
- scripts/yans/eval/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v0-loglikelihood_rolling +1 -0
scripts/yans/eval/lm-evaluation-harness/tests/testdata/anagrams1-v0-greedy_until
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
7c0c5246d3f751f39119a5629ac1d4b2c6fd2a315f78d6de9b2c387e24e3fef1
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_2dm-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
14ac5e510cdf82967d6827a9ca059906ee1db2e347be1b17f36403a157e73552
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_animate_subject_passive-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
064c38fcd072b8bd12f54ea4f8e41599ed4e11dc386e93b77e1fc07967d1f960
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_coordinate_structure_constraint_complex_left_branch": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_coordinate_structure_constraint_complex_left_branch": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ccc64b4d5e80c081d5161aae5828212ba49d277ca8c5a4281f181744727a6a99
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_distractor_agreement_relative_clause-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
bf78e2b53c0f3531303c668c96bd3897a0a35e960da37439e63724ecba4e371a
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_drop_argument-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
616109e63f162dcd31a632943e7ef0c9e0431afeb179e83e9b04b39007b16f5b
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_ellipsis_n_bar_1-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_ellipsis_n_bar_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_ellipsis_n_bar_1": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_existential_there_subject_raising-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_existential_there_subject_raising": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_existential_there_subject_raising": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
7084358b1b7dd7fb5ead1a58f4b499d6f7610eca897bfac25a986d0f9a91aa5d
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_only_npi_licensor_present-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
d2d0711611b5b218c6fa8c7278494749252b7868c396451919b761303556bd66
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_principle_A_case_2-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_principle_A_case_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_case_2": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_sentential_negation_npi_scope-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
32fcbd0a1c6e664af2751bad552587b5ca3911973b07f4fb2cf0a2acd3de5349
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_no_gap-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
d1d3e439b2020ef5ed232bfebbcc9634adc5117e9eb61e38fdbbe2c8ea128d54
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
a142cc2a6fcd93230b650927b07367cad957b8f3f42cb4072151da53dea301df
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"blimp_wh_vs_that_no_gap_long_distance": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_vs_that_no_gap_long_distance": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/cb-v1-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
77b11f4348eb8a7f57faf95c531fda01ab4bf0e729f91a82451ed8e71ec8e66d
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"crows_pairs_english": {"likelihood_difference": 0.3367363060632734, "likelihood_difference_stderr": 0.005827747024053628, "pct_stereotype": 0.5062611806797853, "pct_stereotype_stderr": 0.012212341600228745}}, "versions": {"crows_pairs_english": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_disability-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
90c1bcfdeec0ff51d891ee8cf00ae2a5ec61bab6739faea9865809b8ffed2cdb
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_race_color-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0a750596d77cd96502dc414ff699a399b1b91c2078adeec1d3dd982b3d591089
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_race_color-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"crows_pairs_french_race_color": {"likelihood_difference": 0.33233909422443764, "likelihood_difference_stderr": 0.010623405969915857, "pct_stereotype": 0.4782608695652174, "pct_stereotype_stderr": 0.023315932363473738}}, "versions": {"crows_pairs_french_race_color": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_religion-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
8af6445eeb634dad5f0723e40615afe993e1e3f129a4f314fe4117e633c2efd3
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_socioeconomic-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
8ba0a525c65f795c99f6416e70c998e75e4b6cc43bf9a4bd7ccacd3c3591e9cb
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/gpt3_test_f307d52964c295e2005c5e782b688c24388e0cecadf29f1e6fc7f394236ea9c0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f11de4b3d45d1590ba78935e824ae86ef75bbc370df500f89dde2c397d11c01a
|
3 |
+
size 1297
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hellaswag-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
abb808c97d6529eda6c11067837a132c62d25cba0394d720f80cca6df9f7196e
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_computer_science-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-college_computer_science": {"acc": 0.22, "acc_norm": 0.24, "acc_norm_stderr": 0.04292346959909282, "acc_stderr": 0.041633319989322695}}, "versions": {"hendrycksTest-college_computer_science": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_mathematics-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-college_mathematics": {"acc": 0.18, "acc_norm": 0.2, "acc_norm_stderr": 0.04020151261036844, "acc_stderr": 0.038612291966536955}}, "versions": {"hendrycksTest-college_mathematics": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_medicine-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
dd6e0a9be1407890e9f8cd4434fb6aa4752ab3d2473837fd465ad99f60ad685e
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_medicine-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-college_medicine": {"acc": 0.27167630057803466, "acc_norm": 0.2543352601156069, "acc_norm_stderr": 0.0332055644308557, "acc_stderr": 0.03391750322321659}}, "versions": {"hendrycksTest-college_medicine": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_physics-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-college_physics": {"acc": 0.23529411764705882, "acc_norm": 0.23529411764705882, "acc_norm_stderr": 0.04220773659171453, "acc_stderr": 0.04220773659171452}}, "versions": {"hendrycksTest-college_physics": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-conceptual_physics-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
622f191ccfc7a597d99f39897ebe3f95a9ddce0e662fcfb411aa554b289bb355
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-formal_logic-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-formal_logic": {"acc": 0.25396825396825395, "acc_norm": 0.2698412698412698, "acc_norm_stderr": 0.03970158273235172, "acc_stderr": 0.03893259610604674}}, "versions": {"hendrycksTest-formal_logic": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-global_facts-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-global_facts": {"acc": 0.23, "acc_norm": 0.23, "acc_norm_stderr": 0.04229525846816507, "acc_stderr": 0.04229525846816507}}, "versions": {"hendrycksTest-global_facts": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-high_school_macroeconomics": {"acc": 0.2230769230769231, "acc_norm": 0.22564102564102564, "acc_norm_stderr": 0.021193632525148522, "acc_stderr": 0.021107730127244}}, "versions": {"hendrycksTest-high_school_macroeconomics": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_psychology-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0e4c8d13806d3696167e40544d2d114c557c10c74bc61fcb9c51bbfced0266ef
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-human_aging-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0880b3a78f8d7b17ffc612031427b9085367cf65dabe2a68c4b64e3171d17e88
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-human_sexuality-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
4b07922fa1d549b655c21440b13d869263ce7dd9771d8147c450f11c91d26c10
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-machine_learning-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-machine_learning": {"acc": 0.23214285714285715, "acc_norm": 0.22321428571428573, "acc_norm_stderr": 0.039523019677025116, "acc_stderr": 0.04007341809755806}}, "versions": {"hendrycksTest-machine_learning": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-marketing-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-marketing": {"acc": 0.2863247863247863, "acc_norm": 0.2905982905982906, "acc_norm_stderr": 0.029745048572674043, "acc_stderr": 0.029614323690456648}}, "versions": {"hendrycksTest-marketing": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-moral_scenarios-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-moral_scenarios": {"acc": 0.2547486033519553, "acc_norm": 0.25251396648044694, "acc_norm_stderr": 0.014530330201468654, "acc_stderr": 0.014572650383409158}}, "versions": {"hendrycksTest-moral_scenarios": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-professional_psychology-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"hendrycksTest-professional_psychology": {"acc": 0.27124183006535946, "acc_norm": 0.2826797385620915, "acc_norm_stderr": 0.01821726955205344, "acc_stderr": 0.01798661530403031}}, "versions": {"hendrycksTest-professional_psychology": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_cloze-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"lambada_cloze": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_cloze": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_mt_it-v0-loglikelihood
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
fd87c6c5cf4e0499c5f9f80e5bd7ee6a4f3d2991902a0cc3ec9e6eaf22d6760a
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v1-greedy_until
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
2aa9ae43ee9dbb2457525247d7b65358632c5eaa9cbfc40cf95a4f17f5d942ad
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v1-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"math_counting_and_prob": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_counting_and_prob": 1}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/math_prealgebra-v1-greedy_until
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
752cdf343d7152e476b0273065024f6ea0e0f47ea385c6bdf9067736cb39724a
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/mnli_mismatched-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"mnli_mismatched": {"acc": 0.3360455655004068, "acc_stderr": 0.004763973908606819}}, "versions": {"mnli_mismatched": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/mrpc-v0-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"mrpc": {"acc": 0.5392156862745098, "acc_stderr": 0.024707732873723128, "f1": 0.5982905982905982, "f1_stderr": 0.028928325246283727}}, "versions": {"mrpc": 0}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/pile_books3-v1-res.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": {"pile_books3": {"bits_per_byte": 1.2901280503011222e-06, "byte_perplexity": 1.0000008942490204, "word_perplexity": 1.0000052870063607}}, "versions": {"pile_books3": 1}}
|
scripts/yans/eval/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v0-loglikelihood_rolling
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
d5b7967c0ece8b816f3921a8bd0fad23365349e935b491595e2ad1135af42da6
|