diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a873d66d3a4e98fc2ce2df26e53f20a599bc4e8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml @@ -0,0 +1,18 @@ +group: agieval_en +task: + - agieval_aqua_rat + - agieval_gaokao_english # categorizing as EN because the AGIEval codebase lists this as in `english_qa_tasks` + - agieval_logiqa_en + - agieval_lsat_ar + - agieval_lsat_lr + - agieval_lsat_rc + - agieval_math + - agieval_sat_en_without_passage + - agieval_sat_en + - agieval_sat_math +aggregate_metric_list: + - metric: acc + aggregation: mean + weight_by_size: true +metadata: + version: 0.0 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-chinese.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-chinese.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d58b5bc495917482ef69f04604b7f78f91339f5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-chinese.yaml @@ -0,0 +1,3 @@ +include: aqua-rat.yaml +task: agieval_gaokao_chinese +dataset_path: hails/agieval-gaokao-chinese diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-geography.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6dbce6f4873e272f9c28f49b0061857060df2e97 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-geography.yaml @@ -0,0 +1,3 @@ +include: aqua-rat.yaml +task: agieval_gaokao_geography +dataset_path: hails/agieval-gaokao-geography diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-mathqa.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-mathqa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e0d97a515559d9eaecca8bc73949a1d6886b1922 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-mathqa.yaml @@ -0,0 +1,3 @@ +include: aqua-rat.yaml +task: agieval_gaokao_mathqa +dataset_path: hails/agieval-gaokao-mathqa diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43a047edafd06ab29c666741dd4f28560c64eff9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-physics.yaml @@ -0,0 +1,3 @@ +include: aqua-rat.yaml +task: agieval_gaokao_physics +dataset_path: hails/agieval-gaokao-physics diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bd1dff40b0017ee23067cd20bc8543eaf8081b2 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml @@ -0,0 +1,3 @@ +include: aqua-rat.yaml +task: agieval_logiqa_en +dataset_path: hails/agieval-logiqa-en diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ca9198b53240e04e69a617274f93964be067539 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml @@ -0,0 +1,3 @@ +include: aqua-rat.yaml +task: agieval_logiqa_zh +dataset_path: hails/agieval-logiqa-zh diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23a9dce7d3853af35091d0bd32df1dbd481ab7aa --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml @@ -0,0 +1,3 @@ +include: aqua-rat.yaml +task: agieval_lsat_rc +dataset_path: hails/agieval-lsat-rc diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..793d48aec2228daef7431f846e3166de0e12a602 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml @@ -0,0 +1,3 @@ +include: aqua-rat.yaml +task: agieval_sat_en +dataset_path: hails/agieval-sat-en diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/README.md b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d3877a23866e75bd666b877c1225b956a226ba81 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/README.md @@ -0,0 +1,52 @@ +# Task-name + +### Paper + +Title: `BLiMP: A Benchmark of Linguistic Minimal Pairs for English` +Abstract: `https://arxiv.org/abs/1912.00582` + +BLiMP is a challenge set for evaluating what language models (LMs) know about +major grammatical phenomena in English. BLiMP consists of 67 sub-datasets, each +containing 1000 minimal pairs isolating specific contrasts in syntax, morphology, +or semantics. The data is automatically generated according to expert-crafted +grammars. + +Homepage: https://github.com/alexwarstadt/blimp + + +### Citation + +``` +@article{warstadt2019blimp, + author = {Warstadt, Alex and Parrish, Alicia and Liu, Haokun and Mohananey, Anhad and Peng, Wei and Wang, Sheng-Fu and Bowman, Samuel R.}, + title = {BLiMP: The Benchmark of Linguistic Minimal Pairs for English}, + journal = {Transactions of the Association for Computational Linguistics}, + volume = {8}, + number = {}, + pages = {377-392}, + year = {2020}, + doi = {10.1162/tacl\_a\_00321}, + URL = {https://doi.org/10.1162/tacl_a_00321}, + eprint = {https://doi.org/10.1162/tacl_a_00321}, + abstract = { We introduce The Benchmark of Linguistic Minimal Pairs (BLiMP),1 a challenge set for evaluating the linguistic knowledge of language models (LMs) on major grammatical phenomena in English. BLiMP consists of 67 individual datasets, each containing 1,000 minimal pairs—that is, pairs of minimally different sentences that contrast in grammatical acceptability and isolate specific phenomenon in syntax, morphology, or semantics. We generate the data according to linguist-crafted grammar templates, and human aggregate agreement with the labels is 96.4\%. We evaluate n-gram, LSTM, and Transformer (GPT-2 and Transformer-XL) LMs by observing whether they assign a higher probability to the acceptable sentence in each minimal pair. We find that state-of-the-art models identify morphological contrasts related to agreement reliably, but they struggle with some subtle semantic and syntactic phenomena, such as negative polarity items and extraction islands. } +} +``` + +### Subtasks + +List or describe tasks defined in this folder, and their names here: +* `task_name`: `1-sentence description of what this particular task does` +* `task_name2`: ..... + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [ ] Is the task an existing benchmark in the literature? + * [ ] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6393eeada381684c85a119b83c68d2c759787f44 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml @@ -0,0 +1,75 @@ +group: blimp +task: + - "blimp_adjunct_island" + - "blimp_anaphor_gender_agreement" + - "blimp_anaphor_number_agreement" + - "blimp_animate_subject_passive" + - "blimp_animate_subject_trans" + - "blimp_causative" + - "blimp_complex_NP_island" + - "blimp_coordinate_structure_constraint_complex_left_branch" + - "blimp_coordinate_structure_constraint_object_extraction" + - "blimp_determiner_noun_agreement_1" + - "blimp_determiner_noun_agreement_2" + - "blimp_determiner_noun_agreement_irregular_1" + - "blimp_determiner_noun_agreement_irregular_2" + - "blimp_determiner_noun_agreement_with_adj_2" + - "blimp_determiner_noun_agreement_with_adj_irregular_1" + - "blimp_determiner_noun_agreement_with_adj_irregular_2" + - "blimp_determiner_noun_agreement_with_adjective_1" + - "blimp_distractor_agreement_relational_noun" + - "blimp_distractor_agreement_relative_clause" + - "blimp_drop_argument" + - "blimp_ellipsis_n_bar_1" + - "blimp_ellipsis_n_bar_2" + - "blimp_existential_there_object_raising" + - "blimp_existential_there_quantifiers_1" + - "blimp_existential_there_quantifiers_2" + - "blimp_existential_there_subject_raising" + - "blimp_expletive_it_object_raising" + - "blimp_inchoative" + - "blimp_intransitive" + - "blimp_irregular_past_participle_adjectives" + - "blimp_irregular_past_participle_verbs" + - "blimp_irregular_plural_subject_verb_agreement_1" + - "blimp_irregular_plural_subject_verb_agreement_2" + - "blimp_left_branch_island_echo_question" + - "blimp_left_branch_island_simple_question" + - "blimp_matrix_question_npi_licensor_present" + - "blimp_npi_present_1" + - "blimp_npi_present_2" + - "blimp_only_npi_licensor_present" + - "blimp_only_npi_scope" + - "blimp_passive_1" + - "blimp_passive_2" + - "blimp_principle_A_c_command" + - "blimp_principle_A_case_1" + - "blimp_principle_A_case_2" + - "blimp_principle_A_domain_1" + - "blimp_principle_A_domain_2" + - "blimp_principle_A_domain_3" + - "blimp_principle_A_reconstruction" + - "blimp_regular_plural_subject_verb_agreement_1" + - "blimp_regular_plural_subject_verb_agreement_2" + - "blimp_sentential_negation_npi_licensor_present" + - "blimp_sentential_negation_npi_scope" + - "blimp_sentential_subject_island" + - "blimp_superlative_quantifiers_1" + - "blimp_superlative_quantifiers_2" + - "blimp_tough_vs_raising_1" + - "blimp_tough_vs_raising_2" + - "blimp_transitive" + - "blimp_wh_island" + - "blimp_wh_questions_object_gap" + - "blimp_wh_questions_subject_gap" + - "blimp_wh_questions_subject_gap_long_distance" + - "blimp_wh_vs_that_no_gap" + - "blimp_wh_vs_that_no_gap_long_distance" + - "blimp_wh_vs_that_with_gap" + - "blimp_wh_vs_that_with_gap_long_distance" +aggregate_metric_list: + - metric: acc + aggregation: mean + weight_by_size: False +metadata: + version: 2.0 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..f81e7938af5c6e0ddbc4605e2c39acb1a9f0b374 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml @@ -0,0 +1,13 @@ +dataset_path: blimp +output_type: multiple_choice +validation_split: train +doc_to_text: "" +doc_to_target: 0 +doc_to_choice: "{{[sentence_good, sentence_bad]}}" +num_fewshot: 0 +should_decontaminate: true +doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}" +metric_list: + - metric: acc +metadata: + version: 1.0 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/adjunct_island.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/adjunct_island.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abdb4b8c898e71eac1da1de57b4ff9b425a32644 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/adjunct_island.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: adjunct_island +include: _template_yaml +task: blimp_adjunct_island diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_gender_agreement.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_gender_agreement.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9117dafad3c43968010d4c595d0ffafcc377de44 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_gender_agreement.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: anaphor_gender_agreement +include: _template_yaml +task: blimp_anaphor_gender_agreement diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_number_agreement.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_number_agreement.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e63200c83f41a0f03bd4afba0795e8071952cebd --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_number_agreement.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: anaphor_number_agreement +include: _template_yaml +task: blimp_anaphor_number_agreement diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_passive.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_passive.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99118adb9f283a3dc9f5e26fa387915ed3a6a57c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_passive.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: animate_subject_passive +include: _template_yaml +task: blimp_animate_subject_passive diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_trans.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_trans.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d15eb2c77d454ae8e2791cac85601a803f4bd785 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_trans.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: animate_subject_trans +include: _template_yaml +task: blimp_animate_subject_trans diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b82ef3914b5dd34d1417964dacb0bd2f038b190 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: causative +include: _template_yaml +task: blimp_causative diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/complex_NP_island.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/complex_NP_island.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4ccfe41fa0e6e5d3b8d5b46d6f2edaac60606f9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/complex_NP_island.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: complex_NP_island +include: _template_yaml +task: blimp_complex_NP_island diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1acc7d544a1fcf6756264d1ac236c839128ff449 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: coordinate_structure_constraint_complex_left_branch +include: _template_yaml +task: blimp_coordinate_structure_constraint_complex_left_branch diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dbcd6ae9c006dd52b37a252097ab0a038a68d190 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: coordinate_structure_constraint_object_extraction +include: _template_yaml +task: blimp_coordinate_structure_constraint_object_extraction diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c27935e834d8ee21001dc897714c9c6e3b4a390 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: determiner_noun_agreement_1 +include: _template_yaml +task: blimp_determiner_noun_agreement_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8c715a7b95de1b1f9b03afdb1001ba9b4e94442 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: determiner_noun_agreement_2 +include: _template_yaml +task: blimp_determiner_noun_agreement_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c2ab1b6af5c72f76d0826b9725ea651426fc830 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: determiner_noun_agreement_irregular_1 +include: _template_yaml +task: blimp_determiner_noun_agreement_irregular_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69c77d12e0174676cbdc1c009d1612ffde8e3d42 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: determiner_noun_agreement_irregular_2 +include: _template_yaml +task: blimp_determiner_noun_agreement_irregular_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb8dba60ef1b9aa3a5af3652b86637fe10577116 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: determiner_noun_agreement_with_adj_2 +include: _template_yaml +task: blimp_determiner_noun_agreement_with_adj_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..57f12ecade63b595378cb2c9aadf710725e9d4b0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: determiner_noun_agreement_with_adj_irregular_1 +include: _template_yaml +task: blimp_determiner_noun_agreement_with_adj_irregular_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6df0e7d52df67c979fb74a440a113addb0c434bf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: determiner_noun_agreement_with_adj_irregular_2 +include: _template_yaml +task: blimp_determiner_noun_agreement_with_adj_irregular_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4512e9176f98a9f2ec3f53de15657b97274809fb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: determiner_noun_agreement_with_adjective_1 +include: _template_yaml +task: blimp_determiner_noun_agreement_with_adjective_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16e3c0217ee09d554edbe8210ff6c78375d267a4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: distractor_agreement_relational_noun +include: _template_yaml +task: blimp_distractor_agreement_relational_noun diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9fbc28c51d663932ae558087f28a0333131148bd --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: distractor_agreement_relative_clause +include: _template_yaml +task: blimp_distractor_agreement_relative_clause diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db3b1fed109c802774c1ac8e347a931febc89646 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: drop_argument +include: _template_yaml +task: blimp_drop_argument diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3686534f3edf83df2c470a7907678db8ebe85abc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: ellipsis_n_bar_1 +include: _template_yaml +task: blimp_ellipsis_n_bar_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bac472bdff2f61df39eb2fec55a98c44ca86b702 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: ellipsis_n_bar_2 +include: _template_yaml +task: blimp_ellipsis_n_bar_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_object_raising.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_object_raising.yaml new file mode 100644 index 0000000000000000000000000000000000000000..765596462dce91f51b557fca254deef3a2ee325e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_object_raising.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: existential_there_object_raising +include: _template_yaml +task: blimp_existential_there_object_raising diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15396ae3acadcada2e12549deeacd66b856d5a69 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: existential_there_quantifiers_1 +include: _template_yaml +task: blimp_existential_there_quantifiers_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81370693b6be13ce5b187f0954ae45aa7156d9d7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: existential_there_quantifiers_2 +include: _template_yaml +task: blimp_existential_there_quantifiers_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_subject_raising.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_subject_raising.yaml new file mode 100644 index 0000000000000000000000000000000000000000..45e18aebb660ed759099230686c0e1ae24ea3f86 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_subject_raising.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: existential_there_subject_raising +include: _template_yaml +task: blimp_existential_there_subject_raising diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/expletive_it_object_raising.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/expletive_it_object_raising.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ee8d01875cec8b19ae74124fad0e1103c87e480 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/expletive_it_object_raising.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: expletive_it_object_raising +include: _template_yaml +task: blimp_expletive_it_object_raising diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/generate_configs.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/generate_configs.py new file mode 100644 index 0000000000000000000000000000000000000000..a32c366834592041bde8b5fcaf2cc3c821f40f6f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/generate_configs.py @@ -0,0 +1,94 @@ +import yaml + + +all_subtasks = [ + "adjunct_island", + "anaphor_gender_agreement", + "anaphor_number_agreement", + "animate_subject_passive", + "animate_subject_trans", + "causative", + "complex_NP_island", + "coordinate_structure_constraint_complex_left_branch", + "coordinate_structure_constraint_object_extraction", + "determiner_noun_agreement_1", + "determiner_noun_agreement_2", + "determiner_noun_agreement_irregular_1", + "determiner_noun_agreement_irregular_2", + "determiner_noun_agreement_with_adj_2", + "determiner_noun_agreement_with_adj_irregular_1", + "determiner_noun_agreement_with_adj_irregular_2", + "determiner_noun_agreement_with_adjective_1", + "distractor_agreement_relational_noun", + "distractor_agreement_relative_clause", + "drop_argument", + "ellipsis_n_bar_1", + "ellipsis_n_bar_2", + "existential_there_object_raising", + "existential_there_quantifiers_1", + "existential_there_quantifiers_2", + "existential_there_subject_raising", + "expletive_it_object_raising", + "inchoative", + "intransitive", + "irregular_past_participle_adjectives", + "irregular_past_participle_verbs", + "irregular_plural_subject_verb_agreement_1", + "irregular_plural_subject_verb_agreement_2", + "left_branch_island_echo_question", + "left_branch_island_simple_question", + "matrix_question_npi_licensor_present", + "npi_present_1", + "npi_present_2", + "only_npi_licensor_present", + "only_npi_scope", + "passive_1", + "passive_2", + "principle_A_c_command", + "principle_A_case_1", + "principle_A_case_2", + "principle_A_domain_1", + "principle_A_domain_2", + "principle_A_domain_3", + "principle_A_reconstruction", + "regular_plural_subject_verb_agreement_1", + "regular_plural_subject_verb_agreement_2", + "sentential_negation_npi_licensor_present", + "sentential_negation_npi_scope", + "sentential_subject_island", + "superlative_quantifiers_1", + "superlative_quantifiers_2", + "tough_vs_raising_1", + "tough_vs_raising_2", + "transitive", + "wh_island", + "wh_questions_object_gap", + "wh_questions_subject_gap", + "wh_questions_subject_gap_long_distance", + "wh_vs_that_no_gap", + "wh_vs_that_no_gap_long_distance", + "wh_vs_that_with_gap", + "wh_vs_that_with_gap_long_distance", +] + + +def main() -> None: + for task in all_subtasks: + file_name = f"{task}.yaml" + try: + with open(f"{file_name}", "w", encoding="utf-8") as f: + f.write("# Generated by utils.py\n") + yaml.dump( + { + "include": "_template_yaml", + "task": "blimp_" + task, + "dataset_name": task, + }, + f, + ) + except FileExistsError: + pass + + +if __name__ == "__main__": + main() diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d5b7edbdc26833f7ae645889d8642077fd979bc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: intransitive +include: _template_yaml +task: blimp_intransitive diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe9097d6673f9a3d5d05f511f9ea48940f41d44f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: irregular_past_participle_adjectives +include: _template_yaml +task: blimp_irregular_past_participle_adjectives diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..906fb347710e46c3159aaee05def45730b30929f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: irregular_past_participle_verbs +include: _template_yaml +task: blimp_irregular_past_participle_verbs diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..537c7764f671636cfb781382397f525d0fba305a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: irregular_plural_subject_verb_agreement_1 +include: _template_yaml +task: blimp_irregular_plural_subject_verb_agreement_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d3b84fceab0e3907ab6b1bd3e44a0e6c9445416 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: irregular_plural_subject_verb_agreement_2 +include: _template_yaml +task: blimp_irregular_plural_subject_verb_agreement_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_echo_question.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_echo_question.yaml new file mode 100644 index 0000000000000000000000000000000000000000..409e8ccca8a101366a0f881e775a7dcf9ff317b6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_echo_question.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: left_branch_island_echo_question +include: _template_yaml +task: blimp_left_branch_island_echo_question diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_simple_question.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_simple_question.yaml new file mode 100644 index 0000000000000000000000000000000000000000..214de3c2edb49de48878e6baed1bf725c9728b98 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_simple_question.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: left_branch_island_simple_question +include: _template_yaml +task: blimp_left_branch_island_simple_question diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml new file mode 100644 index 0000000000000000000000000000000000000000..712cf4313ee90bc407b86d51c49fcaa3198247f8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: matrix_question_npi_licensor_present +include: _template_yaml +task: blimp_matrix_question_npi_licensor_present diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4031b4cf5f691d24486a144455a06c9f84ca2b86 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: npi_present_1 +include: _template_yaml +task: blimp_npi_present_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b401a9fce3deefd32f83315f55993739e9c26b3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: npi_present_2 +include: _template_yaml +task: blimp_npi_present_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_licensor_present.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_licensor_present.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dbce62337d39d44aed2f0f14cfd51dec367a42c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_licensor_present.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: only_npi_licensor_present +include: _template_yaml +task: blimp_only_npi_licensor_present diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_scope.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_scope.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4386575f591b9f03cf12f37e04ee8632c4fbec79 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_scope.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: only_npi_scope +include: _template_yaml +task: blimp_only_npi_scope diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0dd6aca0535d448d9269ae1959063d687955a17f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: passive_1 +include: _template_yaml +task: blimp_passive_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_c_command.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_c_command.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9dfa123588d518f68748cf102dbd72941296059 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_c_command.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: principle_A_c_command +include: _template_yaml +task: blimp_principle_A_c_command diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..552f8a1e2423a6a4b7c1ea6a57b10f15fdbdbd1d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: principle_A_case_1 +include: _template_yaml +task: blimp_principle_A_case_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..85aa920a268d5dbc4d7c69df746d4b70e334d206 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: principle_A_case_2 +include: _template_yaml +task: blimp_principle_A_case_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb06e731c5836934df3cbf8f77b1a768e248271d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: principle_A_domain_1 +include: _template_yaml +task: blimp_principle_A_domain_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec3be9a64d0bb5a408a905ed1b72c0b3eaf603c9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: principle_A_domain_2 +include: _template_yaml +task: blimp_principle_A_domain_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_3.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6ff32b71e82396c1ce36632503bd5f12e84d1b8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_3.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: principle_A_domain_3 +include: _template_yaml +task: blimp_principle_A_domain_3 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_reconstruction.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_reconstruction.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e2cdadc34fc0c7c3e14c8ab24ce0d522f7835d0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_reconstruction.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: principle_A_reconstruction +include: _template_yaml +task: blimp_principle_A_reconstruction diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d4df1f7216513f772006c5742917f692e827d59 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: regular_plural_subject_verb_agreement_1 +include: _template_yaml +task: blimp_regular_plural_subject_verb_agreement_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37cdb781391d0280c96458b6cf8493d65ca00d3c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: regular_plural_subject_verb_agreement_2 +include: _template_yaml +task: blimp_regular_plural_subject_verb_agreement_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_licensor_present.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_licensor_present.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df607e5c79e02ef8b284ce2b458ba5371951fc89 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_licensor_present.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: sentential_negation_npi_licensor_present +include: _template_yaml +task: blimp_sentential_negation_npi_licensor_present diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml new file mode 100644 index 0000000000000000000000000000000000000000..854d9e5d86e393abbbca986cfebbd6156465f1eb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: sentential_negation_npi_scope +include: _template_yaml +task: blimp_sentential_negation_npi_scope diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_subject_island.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_subject_island.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e26341a80a3ffb03e16aa0dc3c10471a4ca4ae3e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_subject_island.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: sentential_subject_island +include: _template_yaml +task: blimp_sentential_subject_island diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/superlative_quantifiers_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/superlative_quantifiers_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3cf8bfc238feb272c290621c9d55772cb6f5dc4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/superlative_quantifiers_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: superlative_quantifiers_1 +include: _template_yaml +task: blimp_superlative_quantifiers_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/tough_vs_raising_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/tough_vs_raising_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7abc4dc28ddb4074bcb2db2f8d706119b1ca08d3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/tough_vs_raising_1.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: tough_vs_raising_1 +include: _template_yaml +task: blimp_tough_vs_raising_1 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18864352a9b1bfdb26c146af8333f9c0dfc4beec --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: transitive +include: _template_yaml +task: blimp_transitive diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b665096a09297695eb40f791faeb81b7d9b7f56 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: wh_island +include: _template_yaml +task: blimp_wh_island diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_object_gap.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_object_gap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb78e7b917573f4c8be60508f454a9ddd6e2b668 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_object_gap.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: wh_questions_object_gap +include: _template_yaml +task: blimp_wh_questions_object_gap diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b956919c455893a0282a7d3842fc57eefe624114 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: wh_questions_subject_gap +include: _template_yaml +task: blimp_wh_questions_subject_gap diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap_long_distance.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap_long_distance.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34c3e5cf7f141db947d42b945262de6849700d3c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap_long_distance.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: wh_questions_subject_gap_long_distance +include: _template_yaml +task: blimp_wh_questions_subject_gap_long_distance diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_no_gap.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_no_gap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2221ce5fe0f55611003ab554d5f24aafad41bebf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_no_gap.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: wh_vs_that_no_gap +include: _template_yaml +task: blimp_wh_vs_that_no_gap diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca5af7a576a5ad6f15544cb748f857a549d90295 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: wh_vs_that_with_gap +include: _template_yaml +task: blimp_wh_vs_that_with_gap diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap_long_distance.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap_long_distance.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d38acc5ff3dc2acd9e207d563377ea4933669f40 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap_long_distance.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: wh_vs_that_with_gap_long_distance +include: _template_yaml +task: blimp_wh_vs_that_with_gap_long_distance