- 0 babi_nli/counting - 1 babi_nli/indefinite-knowledge - 2 babi_nli/simple-negation - 3 babi_nli/three-arg-relations - 4 babi_nli/basic-induction - 5 babi_nli/time-reasoning - 6 babi_nli/compound-coreference - 7 babi_nli/path-finding - 8 babi_nli/positional-reasoning - 9 babi_nli/conjunction - 10 babi_nli/size-reasoning - 11 babi_nli/yes-no-questions - 12 babi_nli/basic-coreference - 13 babi_nli/two-supporting-facts - 14 babi_nli/lists-sets - 15 babi_nli/two-arg-relations - 16 babi_nli/three-supporting-facts - 17 babi_nli/basic-deduction - 18 babi_nli/single-supporting-fact - 19 anli/a1 - 20 anli/a2 - 21 anli/a3 - 22 sick/label - 23 sick/relatedness - 24 sick/entailment_AB - 25 sick/entailment_BA - 26 snli - 27 scitail/snli_format - 28 hans - 29 WANLI - 30 recast/recast_kg_relations - 31 recast/recast_puns - 32 recast/recast_factuality - 33 recast/recast_megaveridicality - 34 recast/recast_verbcorner - 35 recast/recast_verbnet - 36 recast/recast_ner - 37 recast/recast_sentiment - 38 probability_words_nli/usnli - 39 probability_words_nli/reasoning_1hop - 40 probability_words_nli/reasoning_2hop - 41 nan-nli/joey234--nan-nli - 42 nli_fever - 43 breaking_nli - 44 conj_nli - 45 fracas - 46 dialogue_nli - 47 mpe - 48 dnc - 49 gpt3_nli - 50 recast_white/fnplus - 51 recast_white/sprl - 52 recast_white/dpr - 53 joci - 54 contrast_nli - 55 robust_nli/IS_CS - 56 robust_nli/LI_LI - 57 robust_nli/ST_WO - 58 robust_nli/PI_SP - 59 robust_nli/PI_CD - 60 robust_nli/ST_SE - 61 robust_nli/ST_NE - 62 robust_nli/ST_LM - 63 robust_nli_is_sd - 64 robust_nli_li_ts - 65 gen_debiased_nli/snli_seq_z - 66 gen_debiased_nli/snli_z_aug - 67 gen_debiased_nli/snli_par_z - 68 gen_debiased_nli/mnli_par_z - 69 gen_debiased_nli/mnli_z_aug - 70 gen_debiased_nli/mnli_seq_z - 71 add_one_rte - 72 imppres/presupposition_cleft_uniqueness/presupposition - 73 imppres/presupposition_possessed_definites_uniqueness/presupposition - 74 imppres/presupposition_possessed_definites_existence/presupposition - 75 imppres/presupposition_only_presupposition/presupposition - 76 imppres/presupposition_all_n_presupposition/presupposition - 77 imppres/presupposition_both_presupposition/presupposition - 78 imppres/presupposition_change_of_state/presupposition - 79 imppres/presupposition_cleft_existence/presupposition - 80 imppres/presupposition_question_presupposition/presupposition - 81 imppres/implicature_modals/prag - 82 imppres/implicature_numerals_10_100/prag - 83 imppres/implicature_numerals_2_3/prag - 84 imppres/implicature_gradable_adjective/prag - 85 imppres/implicature_quantifiers/prag - 86 imppres/implicature_gradable_verb/prag - 87 imppres/implicature_connectives/prag - 88 imppres/implicature_gradable_adjective/log - 89 imppres/implicature_gradable_verb/log - 90 imppres/implicature_numerals_2_3/log - 91 imppres/implicature_numerals_10_100/log - 92 imppres/implicature_modals/log - 93 imppres/implicature_quantifiers/log - 94 imppres/implicature_connectives/log - 95 glue_diagnostics/diagnostics - 96 hlgd - 97 paws/labeled_final - 98 paws/labeled_swap - 99 quora - 100 medical_questions_pairs - 101 conll2003/pos_tags - 102 conll2003/chunk_tags - 103 conll2003/ner_tags - 104 hh-rlhf - 105 model-written-evals - 106 truthful_qa/multiple_choice - 107 fig-qa - 108 bigbench/fantasy_reasoning - 109 bigbench/nonsense_words_grammar - 110 bigbench/analytic_entailment - 111 bigbench/logic_grid_puzzle - 112 bigbench/geometric_shapes - 113 bigbench/key_value_maps - 114 bigbench/analogical_similarity - 115 bigbench/metaphor_understanding - 116 bigbench/metaphor_boolean - 117 bigbench/ruin_names - 118 bigbench/cs_algorithms - 119 bigbench/physical_intuition - 120 bigbench/mnist_ascii - 121 bigbench/moral_permissibility - 122 bigbench/emoji_movie - 123 bigbench/snarks - 124 bigbench/timedial - 125 bigbench/dark_humor_detection - 126 bigbench/gre_reading_comprehension - 127 bigbench/empirical_judgments - 128 bigbench/causal_judgment - 129 bigbench/fact_checker - 130 bigbench/logical_fallacy_detection - 131 bigbench/identify_math_theorems - 132 bigbench/dyck_languages - 133 bigbench/winowhy - 134 bigbench/logical_sequence - 135 bigbench/strategyqa - 136 bigbench/unit_interpretation - 137 bigbench/authorship_verification - 138 bigbench/undo_permutation - 139 bigbench/epistemic_reasoning - 140 bigbench/human_organs_senses - 141 bigbench/misconceptions - 142 bigbench/international_phonetic_alphabet_nli - 143 bigbench/identify_odd_metaphor - 144 bigbench/mathematical_induction - 145 bigbench/odd_one_out - 146 bigbench/reasoning_about_colored_objects - 147 bigbench/strange_stories - 148 bigbench/evaluating_information_essentiality - 149 bigbench/figure_of_speech_detection - 150 bigbench/english_proverbs - 151 bigbench/general_knowledge - 152 bigbench/tracking_shuffled_objects - 153 bigbench/physics - 154 bigbench/anachronisms - 155 bigbench/simple_ethical_questions - 156 bigbench/logical_args - 157 bigbench/suicide_risk - 158 bigbench/sentence_ambiguity - 159 bigbench/temporal_sequences - 160 bigbench/penguins_in_a_table - 161 bigbench/sports_understanding - 162 bigbench/hyperbaton - 163 bigbench/code_line_description - 164 bigbench/question_selection - 165 bigbench/disambiguation_qa - 166 bigbench/date_understanding - 167 bigbench/play_dialog_same_or_different - 168 bigbench/salient_translation_error_detection - 169 bigbench/irony_identification - 170 bigbench/emojis_emotion_prediction - 171 bigbench/hindu_knowledge - 172 bigbench/conceptual_combinations - 173 bigbench/implicatures - 174 bigbench/movie_dialog_same_or_different - 175 bigbench/social_support - 176 bigbench/presuppositions_as_nli - 177 bigbench/vitaminc_fact_verification - 178 bigbench/hhh_alignment - 179 bigbench/implicit_relations - 180 bigbench/bbq_lite_json - 181 bigbench/phrase_relatedness - 182 bigbench/logical_deduction - 183 bigbench/discourse_marker_prediction - 184 bigbench/movie_recommendation - 185 bigbench/real_or_fake_text - 186 bigbench/formal_fallacies_syllogisms_negation - 187 bigbench/crass_ai - 188 blimp/inchoative - 189 blimp/principle_A_c_command - 190 blimp/matrix_question_npi_licensor_present - 191 blimp/wh_questions_subject_gap_long_distance - 192 blimp/sentential_subject_island - 193 blimp/existential_there_quantifiers_2 - 194 blimp/sentential_negation_npi_scope - 195 blimp/complex_NP_island - 196 blimp/principle_A_reconstruction - 197 blimp/animate_subject_passive - 198 blimp/tough_vs_raising_1 - 199 blimp/wh_vs_that_with_gap - 200 blimp/principle_A_domain_2 - 201 blimp/npi_present_1 - 202 blimp/wh_vs_that_with_gap_long_distance - 203 blimp/superlative_quantifiers_1 - 204 blimp/npi_present_2 - 205 blimp/wh_questions_object_gap - 206 blimp/coordinate_structure_constraint_complex_left_branch - 207 blimp/coordinate_structure_constraint_object_extraction - 208 blimp/left_branch_island_echo_question - 209 blimp/drop_argument - 210 cos_e/v1.0 - 211 cosmos_qa - 212 dream - 213 openbookqa - 214 qasc - 215 quartz - 216 quail - 217 head_qa/en - 218 sciq - 219 social_i_qa - 220 wiki_hop - 221 wiqa - 222 piqa - 223 hellaswag - 224 super_glue/copa - 225 art - 226 hendrycks_test/moral_disputes - 227 hendrycks_test/moral_scenarios - 228 hendrycks_test/nutrition - 229 hendrycks_test/philosophy - 230 hendrycks_test/prehistory - 231 hendrycks_test/professional_accounting - 232 hendrycks_test/professional_law - 233 hendrycks_test/world_religions - 234 hendrycks_test/professional_psychology - 235 hendrycks_test/public_relations - 236 hendrycks_test/security_studies - 237 hendrycks_test/sociology - 238 hendrycks_test/us_foreign_policy - 239 hendrycks_test/virology - 240 hendrycks_test/miscellaneous - 241 hendrycks_test/professional_medicine - 242 hendrycks_test/medical_genetics - 243 hendrycks_test/college_mathematics - 244 hendrycks_test/management - 245 hendrycks_test/high_school_computer_science - 246 hendrycks_test/astronomy - 247 hendrycks_test/high_school_chemistry - 248 hendrycks_test/high_school_biology - 249 hendrycks_test/global_facts - 250 hendrycks_test/formal_logic - 251 hendrycks_test/elementary_mathematics - 252 hendrycks_test/high_school_european_history - 253 hendrycks_test/electrical_engineering - 254 hendrycks_test/conceptual_physics - 255 hendrycks_test/computer_security - 256 hendrycks_test/college_physics - 257 hendrycks_test/college_medicine - 258 hendrycks_test/college_computer_science - 259 hendrycks_test/college_chemistry - 260 hendrycks_test/college_biology - 261 hendrycks_test/econometrics - 262 hendrycks_test/clinical_knowledge - 263 hendrycks_test/anatomy - 264 hendrycks_test/marketing - 265 hendrycks_test/machine_learning - 266 hendrycks_test/logical_fallacies - 267 hendrycks_test/jurisprudence - 268 hendrycks_test/international_law - 269 hendrycks_test/human_sexuality - 270 hendrycks_test/human_aging - 271 hendrycks_test/high_school_world_history - 272 hendrycks_test/abstract_algebra - 273 hendrycks_test/high_school_us_history - 274 hendrycks_test/high_school_psychology - 275 hendrycks_test/high_school_physics - 276 hendrycks_test/high_school_microeconomics - 277 hendrycks_test/high_school_mathematics - 278 hendrycks_test/high_school_macroeconomics - 279 hendrycks_test/high_school_government_and_politics - 280 hendrycks_test/high_school_geography - 281 hendrycks_test/high_school_statistics - 282 hendrycks_test/business_ethics - 283 winogrande/winogrande_xl - 284 codah/codah - 285 ai2_arc/ARC-Challenge/challenge - 286 ai2_arc/ARC-Easy/challenge - 287 definite_pronoun_resolution - 288 swag - 289 math_qa - 290 utilitarianism - 291 TuringBench - 292 trec - 293 vitaminc/tals--vitaminc - 294 hope_edi/english - 295 rumoureval_2019/RumourEval2019 - 296 ethos/binary - 297 ethos/multilabel - 298 glue/cola - 299 glue/sst2 - 300 glue/mrpc - 301 glue/qqp - 302 glue/stsb - 303 glue/mnli - 304 glue/qnli - 305 glue/rte - 306 glue/wnli - 307 super_glue/boolq - 308 super_glue/cb - 309 super_glue/multirc - 310 super_glue/wic - 311 super_glue/axg - 312 tweet_eval/stance_feminist - 313 tweet_eval/stance_atheism - 314 tweet_eval/stance_hillary - 315 tweet_eval/stance_abortion - 316 tweet_eval/sentiment - 317 tweet_eval/offensive - 318 tweet_eval/stance_climate - 319 tweet_eval/irony - 320 tweet_eval/emotion - 321 tweet_eval/emoji - 322 tweet_eval/hate - 323 discovery/discovery - 324 pragmeval/switchboard - 325 pragmeval/squinky-informativeness - 326 pragmeval/emobank-arousal - 327 pragmeval/emobank-dominance - 328 pragmeval/emobank-valence - 329 pragmeval/mrda - 330 pragmeval/verifiability - 331 pragmeval/squinky-implicature - 332 pragmeval/squinky-formality - 333 pragmeval/gum - 334 pragmeval/emergent - 335 pragmeval/persuasiveness-premisetype - 336 pragmeval/pdtb - 337 pragmeval/persuasiveness-eloquence - 338 pragmeval/persuasiveness-specificity - 339 pragmeval/persuasiveness-strength - 340 pragmeval/sarcasm - 341 pragmeval/stac - 342 pragmeval/persuasiveness-claimtype - 343 pragmeval/persuasiveness-relevance - 344 lex_glue/eurlex - 345 lex_glue/scotus - 346 lex_glue/ledgar - 347 lex_glue/unfair_tos - 348 lex_glue/case_hold - 349 imdb - 350 rotten_tomatoes - 351 ag_news - 352 yelp_review_full/yelp_review_full - 353 financial_phrasebank/sentences_allagree - 354 poem_sentiment - 355 dbpedia_14/dbpedia_14 - 356 amazon_polarity/amazon_polarity - 357 app_reviews - 358 hate_speech18 - 359 sms_spam - 360 humicroedit/subtask-1 - 361 humicroedit/subtask-2 - 362 snips_built_in_intents - 363 banking77 - 364 hate_speech_offensive - 365 hyperpartisan_news_detection/byarticle - 366 hyperpartisan_news_detection/bypublisher - 367 go_emotions/simplified - 368 scicite - 369 liar - 370 lexical_relation_classification/ROOT09 - 371 lexical_relation_classification/EVALution - 372 lexical_relation_classification/CogALexV - 373 lexical_relation_classification/BLESS - 374 lexical_relation_classification/K&H+N - 375 linguisticprobing/coordination_inversion - 376 linguisticprobing/odd_man_out - 377 linguisticprobing/word_content - 378 linguisticprobing/obj_number - 379 linguisticprobing/past_present - 380 linguisticprobing/tree_depth - 381 linguisticprobing/sentence_length - 382 linguisticprobing/top_constituents - 383 linguisticprobing/bigram_shift - 384 linguisticprobing/subj_number - 385 crowdflower/sentiment_nuclear_power - 386 crowdflower/tweet_global_warming - 387 crowdflower/airline-sentiment - 388 crowdflower/economic-news - 389 crowdflower/political-media-audience - 390 crowdflower/political-media-bias - 391 crowdflower/political-media-message - 392 crowdflower/text_emotion - 393 crowdflower/corporate-messaging - 394 ethics/commonsense - 395 ethics/deontology - 396 ethics/justice - 397 ethics/virtue - 398 emo/emo2019 - 399 google_wellformed_query - 400 tweets_hate_speech_detection - 401 adv_glue/adv_sst2 - 402 adv_glue/adv_qqp - 403 adv_glue/adv_mnli - 404 adv_glue/adv_mnli_mismatched - 405 adv_glue/adv_qnli - 406 adv_glue/adv_rte - 407 has_part - 408 wnut_17/wnut_17 - 409 ncbi_disease/ncbi_disease - 410 acronym_identification - 411 jnlpba/jnlpba - 412 species_800/species_800 - 413 ontonotes_english/SpeedOfMagic--ontonotes_english - 414 blog_authorship_corpus/gender - 415 blog_authorship_corpus/age - 416 blog_authorship_corpus/horoscope - 417 blog_authorship_corpus/job - 418 open_question_type - 419 health_fact - 420 commonsense_qa - 421 mc_taco - 422 ade_corpus_v2/Ade_corpus_v2_classification - 423 discosense - 424 circa - 425 code_x_glue_cc_defect_detection - 426 code_x_glue_cc_clone_detection_big_clone_bench - 427 code_x_glue_cc_code_refinement/medium - 428 EffectiveFeedbackStudentWriting - 429 promptSentiment - 430 promptNLI - 431 promptSpoke - 432 promptProficiency - 433 promptGrammar - 434 promptCoherence - 435 phrase_similarity - 436 scientific-exaggeration-detection - 437 quarel - 438 fever-evidence-related/mwong--fever-related - 439 numer_sense - 440 dynasent/dynabench.dynasent.r1.all/r1 - 441 dynasent/dynabench.dynasent.r2.all/r2 - 442 Sarcasm_News_Headline - 443 sem_eval_2010_task_8