koichi12 commited on
Commit
852d1e7
·
verified ·
1 Parent(s): c52db42

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. scripts/yans/eval/lm-evaluation-harness/tests/testdata/anagrams1-v0-res.json +1 -0
  2. scripts/yans/eval/lm-evaluation-harness/tests/testdata/arc_easy-v0-loglikelihood +1 -0
  3. scripts/yans/eval/lm-evaluation-harness/tests/testdata/arc_easy-v0-res.json +1 -0
  4. scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_3da-v0-res.json +1 -0
  5. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_animate_subject_trans-v0-loglikelihood +1 -0
  6. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_causative-v0-res.json +1 -0
  7. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-res.json +1 -0
  8. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_existential_there_quantifiers_1-v0-res.json +1 -0
  9. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-res.json +1 -0
  10. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-res.json +1 -0
  11. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_principle_A_c_command-v0-loglikelihood +1 -0
  12. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_principle_A_domain_2-v0-res.json +1 -0
  13. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood +1 -0
  14. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-res.json +1 -0
  15. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_transitive-v0-res.json +1 -0
  16. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_island-v0-loglikelihood +1 -0
  17. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_questions_object_gap-v0-loglikelihood +1 -0
  18. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-res.json +1 -0
  19. scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap-v0-loglikelihood +1 -0
  20. scripts/yans/eval/lm-evaluation-harness/tests/testdata/cb-v0-res.json +1 -0
  21. scripts/yans/eval/lm-evaluation-harness/tests/testdata/cb-v1-res.json +1 -0
  22. scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english-v0-loglikelihood +1 -0
  23. scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_disability-v0-res.json +1 -0
  24. scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_physical_appearance-v0-loglikelihood +1 -0
  25. scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_race_color-v0-res.json +1 -0
  26. scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_religion-v0-loglikelihood +1 -0
  27. scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_physical_appearance-v0-loglikelihood +1 -0
  28. scripts/yans/eval/lm-evaluation-harness/tests/testdata/cycle_letters-v0-greedy_until +1 -0
  29. scripts/yans/eval/lm-evaluation-harness/tests/testdata/ethics_utilitarianism-v0-res.json +1 -0
  30. scripts/yans/eval/lm-evaluation-harness/tests/testdata/ethics_utilitarianism_original-v0-loglikelihood +1 -0
  31. scripts/yans/eval/lm-evaluation-harness/tests/testdata/ethics_utilitarianism_original-v0-res.json +1 -0
  32. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hellaswag-v0-res.json +1 -0
  33. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-business_ethics-v0-loglikelihood +1 -0
  34. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_biology-v0-loglikelihood +1 -0
  35. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_biology-v0-res.json +1 -0
  36. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_computer_science-v0-loglikelihood +1 -0
  37. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-computer_security-v0-res.json +1 -0
  38. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-elementary_mathematics-v0-res.json +1 -0
  39. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_statistics-v0-res.json +1 -0
  40. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_world_history-v0-loglikelihood +1 -0
  41. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-moral_disputes-v0-res.json +1 -0
  42. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-moral_scenarios-v0-loglikelihood +1 -0
  43. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-philosophy-v0-res.json +1 -0
  44. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-public_relations-v0-res.json +1 -0
  45. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-sociology-v0-res.json +1 -0
  46. scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-virology-v0-res.json +1 -0
  47. scripts/yans/eval/lm-evaluation-harness/tests/testdata/iwslt17-en-ar-v0-res.json +1 -0
  48. scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_mt_es-v0-res.json +1 -0
  49. scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_openai-v0-res.json +1 -0
  50. scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_openai_mt_en-v0-res.json +1 -0
scripts/yans/eval/lm-evaluation-harness/tests/testdata/anagrams1-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"anagrams1": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"anagrams1": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/arc_easy-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ ffa6e39a35a16299dcb015f17f986aaa598ad8b4840c4cebe0339a7042232741
scripts/yans/eval/lm-evaluation-harness/tests/testdata/arc_easy-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"arc_easy": {"acc": 0.2474747474747475, "acc_norm": 0.24074074074074073, "acc_norm_stderr": 0.008772796145221907, "acc_stderr": 0.008855114414834707}}, "versions": {"arc_easy": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/arithmetic_3da-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"arithmetic_3da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_3da": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_animate_subject_trans-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ 2a84231e7b79f517427e57e2099c88fed3d60a7efab4ef9506e263b4091d5cfa
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_causative-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"blimp_causative": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_causative": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"blimp_determiner_noun_agreement_with_adj_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_with_adj_2": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_existential_there_quantifiers_1-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"blimp_existential_there_quantifiers_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_existential_there_quantifiers_1": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"blimp_irregular_plural_subject_verb_agreement_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_irregular_plural_subject_verb_agreement_2": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"blimp_matrix_question_npi_licensor_present": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_matrix_question_npi_licensor_present": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_principle_A_c_command-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ 7c2ed82612af9175052cd44d8e178b6dd084c04eb462a3d88fcacfad2df8be8e
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_principle_A_domain_2-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"blimp_principle_A_domain_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_domain_2": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ 5bc0441f31e32443cf761bca6e961d504e1e84b15aa4e1d79e5c8ed5b4c2aa3a
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"blimp_sentential_negation_npi_licensor_present": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_sentential_negation_npi_licensor_present": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_transitive-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"blimp_transitive": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_transitive": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_island-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ 91a9e4b60b0f3572a7fdbd7648d0e69f36e5eb34db715315b0082558d7ed8b65
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_questions_object_gap-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ 4d4aaa0274ccd485ff8430ed61b8f83806febe18c16616c7d050f637a0463eba
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"blimp_wh_questions_subject_gap_long_distance": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_questions_subject_gap_long_distance": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ d41a9b85e4c31e445bf9b46b8642df02203ccc02b4a9b254bf76066d5c54b4b7
scripts/yans/eval/lm-evaluation-harness/tests/testdata/cb-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"cb": {"acc": 0.3392857142857143, "acc_stderr": 0.06384226561930825, "f1": 0.2819143819143819}}, "versions": {"cb": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/cb-v1-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"cb": {"acc": 0.3392857142857143, "acc_stderr": 0.06384226561930825, "f1": 0.2819143819143819}}, "versions": {"cb": 1}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ ee3ce1ddb8071d4189e5b06e7f3c618a434221ac52935d0f434c4d183f01458a
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_disability-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"crows_pairs_english_disability": {"likelihood_difference": 0.3148684792547637, "likelihood_difference_stderr": 0.02800803147051987, "pct_stereotype": 0.36923076923076925, "pct_stereotype_stderr": 0.06032456592830047}}, "versions": {"crows_pairs_english_disability": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_physical_appearance-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ d1823f5038afafa7a5338e42531720480c8ccf4e177789526caf294d52d56e89
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_race_color-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"crows_pairs_english_race_color": {"likelihood_difference": 0.3322827903840805, "likelihood_difference_stderr": 0.01019838186372816, "pct_stereotype": 0.4822834645669291, "pct_stereotype_stderr": 0.022191835500120254}}, "versions": {"crows_pairs_english_race_color": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_english_religion-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ 2ed57377174adaf0fb30037eb055eafdd02cd46e57bc32066d5fecd90a14b6e1
scripts/yans/eval/lm-evaluation-harness/tests/testdata/crows_pairs_french_physical_appearance-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ ea61eaad64e9292790d4bbef955ffeebed7a595de098bc5ac726a6e51f27f9af
scripts/yans/eval/lm-evaluation-harness/tests/testdata/cycle_letters-v0-greedy_until ADDED
@@ -0,0 +1 @@
 
 
1
+ eb23f7d5de7528eefd8ed5f8054c402ff947319cccfef7195995946f99389201
scripts/yans/eval/lm-evaluation-harness/tests/testdata/ethics_utilitarianism-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"ethics_utilitarianism": {"acc": 0.49771214642262895, "acc_stderr": 0.007211546310787838}}, "versions": {"ethics_utilitarianism": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/ethics_utilitarianism_original-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ 5b42ba1faf5ece6a6ec9a3976ce79c1fac8df5b98272aab85457188c2142693c
scripts/yans/eval/lm-evaluation-harness/tests/testdata/ethics_utilitarianism_original-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"ethics_utilitarianism_original": {"acc": 0.5214226289517471, "acc_stderr": 0.007204999520618661}}, "versions": {"ethics_utilitarianism_original": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hellaswag-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"hellaswag": {"acc": 0.24965146385182235, "acc_norm": 0.24756024696275641, "acc_norm_stderr": 0.004307128573285236, "acc_stderr": 0.004319267432460666}}, "versions": {"hellaswag": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-business_ethics-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ b3b27e9dbad587377d3c8cab1072782de883e245da93a563bd8b3099017b1fc0
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_biology-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ c29e4e67ff91af29b9434884874414d1b1b32ccc32903c6b1639469b19907419
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_biology-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"hendrycksTest-college_biology": {"acc": 0.24305555555555555, "acc_norm": 0.2361111111111111, "acc_norm_stderr": 0.03551446610810826, "acc_stderr": 0.03586879280080341}}, "versions": {"hendrycksTest-college_biology": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-college_computer_science-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ 4ea26ad780290429ac5a3317559c154848d662bd40532c966458ba6f2a32d0a3
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-computer_security-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"hendrycksTest-computer_security": {"acc": 0.24, "acc_norm": 0.27, "acc_norm_stderr": 0.044619604333847394, "acc_stderr": 0.042923469599092816}}, "versions": {"hendrycksTest-computer_security": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-elementary_mathematics-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"hendrycksTest-elementary_mathematics": {"acc": 0.2724867724867725, "acc_norm": 0.2830687830687831, "acc_norm_stderr": 0.023201392938194978, "acc_stderr": 0.022930973071633345}}, "versions": {"hendrycksTest-elementary_mathematics": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_statistics-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"hendrycksTest-high_school_statistics": {"acc": 0.2962962962962963, "acc_norm": 0.3055555555555556, "acc_norm_stderr": 0.03141554629402544, "acc_stderr": 0.03114144782353604}}, "versions": {"hendrycksTest-high_school_statistics": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_world_history-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ 1c8b994bd9a63ec874fc8d0e3a27077118b7adc472306b2fd6c55635a78b9d52
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-moral_disputes-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"hendrycksTest-moral_disputes": {"acc": 0.24855491329479767, "acc_norm": 0.27167630057803466, "acc_norm_stderr": 0.023948512905468365, "acc_stderr": 0.023267528432100174}}, "versions": {"hendrycksTest-moral_disputes": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-moral_scenarios-v0-loglikelihood ADDED
@@ -0,0 +1 @@
 
 
1
+ a8e1882e77728b53c8b86312254d08320d8363fb606d746a8dd145b812f62cf5
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-philosophy-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"hendrycksTest-philosophy": {"acc": 0.26366559485530544, "acc_norm": 0.2733118971061093, "acc_norm_stderr": 0.02531176597542612, "acc_stderr": 0.02502553850053234}}, "versions": {"hendrycksTest-philosophy": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-public_relations-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"hendrycksTest-public_relations": {"acc": 0.3090909090909091, "acc_norm": 0.2636363636363636, "acc_norm_stderr": 0.04220224692971987, "acc_stderr": 0.044262946482000985}}, "versions": {"hendrycksTest-public_relations": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-sociology-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"hendrycksTest-sociology": {"acc": 0.23383084577114427, "acc_norm": 0.24875621890547264, "acc_norm_stderr": 0.030567675938916707, "acc_stderr": 0.02992941540834838}}, "versions": {"hendrycksTest-sociology": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/hendrycksTest-virology-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"hendrycksTest-virology": {"acc": 0.27710843373493976, "acc_norm": 0.2710843373493976, "acc_norm_stderr": 0.03460579907553027, "acc_stderr": 0.034843315926805875}}, "versions": {"hendrycksTest-virology": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/iwslt17-en-ar-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"iwslt17-en-ar": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.0, "chrf_stderr": 0.0, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"iwslt17-en-ar": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_mt_es-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"lambada_mt_es": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_es": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_openai-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"lambada_openai": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_openai": 0}}
scripts/yans/eval/lm-evaluation-harness/tests/testdata/lambada_openai_mt_en-v0-res.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"lambada_openai_mt_en": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_openai_mt_en": 0}}