koichi12 commited on Nov 28, 2024

Commit

44dd60b

verified ·

1 Parent(s): 222810a

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml +18 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-chinese.yaml +3 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-geography.yaml +3 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-mathqa.yaml +3 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-physics.yaml +3 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml +3 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml +3 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml +3 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml +3 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/README.md +52 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml +75 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml +13 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/adjunct_island.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_gender_agreement.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_number_agreement.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_passive.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_trans.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/complex_NP_island.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_object_raising.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_subject_raising.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/expletive_it_object_raising.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/generate_configs.py +94 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_echo_question.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_simple_question.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml +4 -0

scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+group: agieval_en
+task:
+  - agieval_aqua_rat
+  - agieval_gaokao_english # categorizing as EN because the AGIEval codebase lists this as in `english_qa_tasks`
+  - agieval_logiqa_en
+  - agieval_lsat_ar
+  - agieval_lsat_lr
+  - agieval_lsat_rc
+  - agieval_math
+  - agieval_sat_en_without_passage
+  - agieval_sat_en
+  - agieval_sat_math
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 0.0

scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-chinese.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+include: aqua-rat.yaml
+task: agieval_gaokao_chinese
+dataset_path: hails/agieval-gaokao-chinese

scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-geography.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+include: aqua-rat.yaml
+task: agieval_gaokao_geography
+dataset_path: hails/agieval-gaokao-geography

scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-mathqa.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+include: aqua-rat.yaml
+task: agieval_gaokao_mathqa
+dataset_path: hails/agieval-gaokao-mathqa

scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-physics.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+include: aqua-rat.yaml
+task: agieval_gaokao_physics
+dataset_path: hails/agieval-gaokao-physics

scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+include: aqua-rat.yaml
+task: agieval_logiqa_en
+dataset_path: hails/agieval-logiqa-en

scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+include: aqua-rat.yaml
+task: agieval_logiqa_zh
+dataset_path: hails/agieval-logiqa-zh

scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+include: aqua-rat.yaml
+task: agieval_lsat_rc
+dataset_path: hails/agieval-lsat-rc

scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+include: aqua-rat.yaml
+task: agieval_sat_en
+dataset_path: hails/agieval-sat-en

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/README.md ADDED Viewed

	@@ -0,0 +1,52 @@

+# Task-name
+### Paper
+Title: `BLiMP: A Benchmark of Linguistic Minimal Pairs for English`
+Abstract: `https://arxiv.org/abs/1912.00582`
+BLiMP is a challenge set for evaluating what language models (LMs) know about
+major grammatical phenomena in English. BLiMP consists of 67 sub-datasets, each
+containing 1000 minimal pairs isolating specific contrasts in syntax, morphology,
+or semantics. The data is automatically generated according to expert-crafted
+grammars.
+Homepage: https://github.com/alexwarstadt/blimp
+### Citation
+```
+@article{warstadt2019blimp,
+    author = {Warstadt, Alex and Parrish, Alicia and Liu, Haokun and Mohananey, Anhad and Peng, Wei and Wang, Sheng-Fu and Bowman, Samuel R.},
+    title = {BLiMP: The Benchmark of Linguistic Minimal Pairs for English},
+    journal = {Transactions of the Association for Computational Linguistics},
+    volume = {8},
+    number = {},
+    pages = {377-392},
+    year = {2020},
+    doi = {10.1162/tacl\_a\_00321},
+    URL = {https://doi.org/10.1162/tacl_a_00321},
+    eprint = {https://doi.org/10.1162/tacl_a_00321},
+    abstract = { We introduce The Benchmark of Linguistic Minimal Pairs (BLiMP),1 a challenge set for evaluating the linguistic knowledge of language models (LMs) on major grammatical phenomena in English. BLiMP consists of 67 individual datasets, each containing 1,000 minimal pairs—that is, pairs of minimally different sentences that contrast in grammatical acceptability and isolate specific phenomenon in syntax, morphology, or semantics. We generate the data according to linguist-crafted grammar templates, and human aggregate agreement with the labels is 96.4\%. We evaluate n-gram, LSTM, and Transformer (GPT-2 and Transformer-XL) LMs by observing whether they assign a higher probability to the acceptable sentence in each minimal pair. We find that state-of-the-art models identify morphological contrasts related to agreement reliably, but they struggle with some subtle semantic and syntactic phenomena, such as negative polarity items and extraction islands. }
+}
+```
+### Subtasks
+List or describe tasks defined in this folder, and their names here:
+* `task_name`: `1-sentence description of what this particular task does`
+* `task_name2`: .....
+### Checklist
+For adding novel benchmarks/datasets to the library:
+* [ ] Is the task an existing benchmark in the literature?
+  * [ ] Have you referenced the original paper that introduced the task?
+  * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
+If other tasks on this dataset are already supported:
+* [ ] Is the "Main" variant of this task clearly denoted?
+* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates?
+* [ ] Have you noted which, if any, published evaluation setups are matched by this variant?

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+group: blimp
+task:
+  - "blimp_adjunct_island"
+  - "blimp_anaphor_gender_agreement"
+  - "blimp_anaphor_number_agreement"
+  - "blimp_animate_subject_passive"
+  - "blimp_animate_subject_trans"
+  - "blimp_causative"
+  - "blimp_complex_NP_island"
+  - "blimp_coordinate_structure_constraint_complex_left_branch"
+  - "blimp_coordinate_structure_constraint_object_extraction"
+  - "blimp_determiner_noun_agreement_1"
+  - "blimp_determiner_noun_agreement_2"
+  - "blimp_determiner_noun_agreement_irregular_1"
+  - "blimp_determiner_noun_agreement_irregular_2"
+  - "blimp_determiner_noun_agreement_with_adj_2"
+  - "blimp_determiner_noun_agreement_with_adj_irregular_1"
+  - "blimp_determiner_noun_agreement_with_adj_irregular_2"
+  - "blimp_determiner_noun_agreement_with_adjective_1"
+  - "blimp_distractor_agreement_relational_noun"
+  - "blimp_distractor_agreement_relative_clause"
+  - "blimp_drop_argument"
+  - "blimp_ellipsis_n_bar_1"
+  - "blimp_ellipsis_n_bar_2"
+  - "blimp_existential_there_object_raising"
+  - "blimp_existential_there_quantifiers_1"
+  - "blimp_existential_there_quantifiers_2"
+  - "blimp_existential_there_subject_raising"
+  - "blimp_expletive_it_object_raising"
+  - "blimp_inchoative"
+  - "blimp_intransitive"
+  - "blimp_irregular_past_participle_adjectives"
+  - "blimp_irregular_past_participle_verbs"
+  - "blimp_irregular_plural_subject_verb_agreement_1"
+  - "blimp_irregular_plural_subject_verb_agreement_2"
+  - "blimp_left_branch_island_echo_question"
+  - "blimp_left_branch_island_simple_question"
+  - "blimp_matrix_question_npi_licensor_present"
+  - "blimp_npi_present_1"
+  - "blimp_npi_present_2"
+  - "blimp_only_npi_licensor_present"
+  - "blimp_only_npi_scope"
+  - "blimp_passive_1"
+  - "blimp_passive_2"
+  - "blimp_principle_A_c_command"
+  - "blimp_principle_A_case_1"
+  - "blimp_principle_A_case_2"
+  - "blimp_principle_A_domain_1"
+  - "blimp_principle_A_domain_2"
+  - "blimp_principle_A_domain_3"
+  - "blimp_principle_A_reconstruction"
+  - "blimp_regular_plural_subject_verb_agreement_1"
+  - "blimp_regular_plural_subject_verb_agreement_2"
+  - "blimp_sentential_negation_npi_licensor_present"
+  - "blimp_sentential_negation_npi_scope"
+  - "blimp_sentential_subject_island"
+  - "blimp_superlative_quantifiers_1"
+  - "blimp_superlative_quantifiers_2"
+  - "blimp_tough_vs_raising_1"
+  - "blimp_tough_vs_raising_2"
+  - "blimp_transitive"
+  - "blimp_wh_island"
+  - "blimp_wh_questions_object_gap"
+  - "blimp_wh_questions_subject_gap"
+  - "blimp_wh_questions_subject_gap_long_distance"
+  - "blimp_wh_vs_that_no_gap"
+  - "blimp_wh_vs_that_no_gap_long_distance"
+  - "blimp_wh_vs_that_with_gap"
+  - "blimp_wh_vs_that_with_gap_long_distance"
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: False
+metadata:
+  version: 2.0

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+dataset_path: blimp
+output_type: multiple_choice
+validation_split: train
+doc_to_text: ""
+doc_to_target: 0
+doc_to_choice: "{{[sentence_good, sentence_bad]}}"
+num_fewshot: 0
+should_decontaminate: true
+doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}"
+metric_list:
+  - metric: acc
+metadata:
+  version: 1.0

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/adjunct_island.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: adjunct_island
+include: _template_yaml
+task: blimp_adjunct_island

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_gender_agreement.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: anaphor_gender_agreement
+include: _template_yaml
+task: blimp_anaphor_gender_agreement

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_number_agreement.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: anaphor_number_agreement
+include: _template_yaml
+task: blimp_anaphor_number_agreement

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_passive.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: animate_subject_passive
+include: _template_yaml
+task: blimp_animate_subject_passive

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_trans.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: animate_subject_trans
+include: _template_yaml
+task: blimp_animate_subject_trans

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: causative
+include: _template_yaml
+task: blimp_causative

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/complex_NP_island.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: complex_NP_island
+include: _template_yaml
+task: blimp_complex_NP_island

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: coordinate_structure_constraint_complex_left_branch
+include: _template_yaml
+task: blimp_coordinate_structure_constraint_complex_left_branch

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: coordinate_structure_constraint_object_extraction
+include: _template_yaml
+task: blimp_coordinate_structure_constraint_object_extraction

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: determiner_noun_agreement_1
+include: _template_yaml
+task: blimp_determiner_noun_agreement_1

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: determiner_noun_agreement_2
+include: _template_yaml
+task: blimp_determiner_noun_agreement_2

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: determiner_noun_agreement_irregular_1
+include: _template_yaml
+task: blimp_determiner_noun_agreement_irregular_1

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: determiner_noun_agreement_irregular_2
+include: _template_yaml
+task: blimp_determiner_noun_agreement_irregular_2

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: determiner_noun_agreement_with_adj_2
+include: _template_yaml
+task: blimp_determiner_noun_agreement_with_adj_2

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: determiner_noun_agreement_with_adj_irregular_1
+include: _template_yaml
+task: blimp_determiner_noun_agreement_with_adj_irregular_1

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: determiner_noun_agreement_with_adj_irregular_2
+include: _template_yaml
+task: blimp_determiner_noun_agreement_with_adj_irregular_2

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: determiner_noun_agreement_with_adjective_1
+include: _template_yaml
+task: blimp_determiner_noun_agreement_with_adjective_1

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: distractor_agreement_relational_noun
+include: _template_yaml
+task: blimp_distractor_agreement_relational_noun

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: distractor_agreement_relative_clause
+include: _template_yaml
+task: blimp_distractor_agreement_relative_clause

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: drop_argument
+include: _template_yaml
+task: blimp_drop_argument

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: ellipsis_n_bar_1
+include: _template_yaml
+task: blimp_ellipsis_n_bar_1

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: ellipsis_n_bar_2
+include: _template_yaml
+task: blimp_ellipsis_n_bar_2

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_object_raising.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: existential_there_object_raising
+include: _template_yaml
+task: blimp_existential_there_object_raising

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: existential_there_quantifiers_1
+include: _template_yaml
+task: blimp_existential_there_quantifiers_1

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: existential_there_quantifiers_2
+include: _template_yaml
+task: blimp_existential_there_quantifiers_2

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_subject_raising.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: existential_there_subject_raising
+include: _template_yaml
+task: blimp_existential_there_subject_raising

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/expletive_it_object_raising.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: expletive_it_object_raising
+include: _template_yaml
+task: blimp_expletive_it_object_raising

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/generate_configs.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import yaml
+all_subtasks = [
+    "adjunct_island",
+    "anaphor_gender_agreement",
+    "anaphor_number_agreement",
+    "animate_subject_passive",
+    "animate_subject_trans",
+    "causative",
+    "complex_NP_island",
+    "coordinate_structure_constraint_complex_left_branch",
+    "coordinate_structure_constraint_object_extraction",
+    "determiner_noun_agreement_1",
+    "determiner_noun_agreement_2",
+    "determiner_noun_agreement_irregular_1",
+    "determiner_noun_agreement_irregular_2",
+    "determiner_noun_agreement_with_adj_2",
+    "determiner_noun_agreement_with_adj_irregular_1",
+    "determiner_noun_agreement_with_adj_irregular_2",
+    "determiner_noun_agreement_with_adjective_1",
+    "distractor_agreement_relational_noun",
+    "distractor_agreement_relative_clause",
+    "drop_argument",
+    "ellipsis_n_bar_1",
+    "ellipsis_n_bar_2",
+    "existential_there_object_raising",
+    "existential_there_quantifiers_1",
+    "existential_there_quantifiers_2",
+    "existential_there_subject_raising",
+    "expletive_it_object_raising",
+    "inchoative",
+    "intransitive",
+    "irregular_past_participle_adjectives",
+    "irregular_past_participle_verbs",
+    "irregular_plural_subject_verb_agreement_1",
+    "irregular_plural_subject_verb_agreement_2",
+    "left_branch_island_echo_question",
+    "left_branch_island_simple_question",
+    "matrix_question_npi_licensor_present",
+    "npi_present_1",
+    "npi_present_2",
+    "only_npi_licensor_present",
+    "only_npi_scope",
+    "passive_1",
+    "passive_2",
+    "principle_A_c_command",
+    "principle_A_case_1",
+    "principle_A_case_2",
+    "principle_A_domain_1",
+    "principle_A_domain_2",
+    "principle_A_domain_3",
+    "principle_A_reconstruction",
+    "regular_plural_subject_verb_agreement_1",
+    "regular_plural_subject_verb_agreement_2",
+    "sentential_negation_npi_licensor_present",
+    "sentential_negation_npi_scope",
+    "sentential_subject_island",
+    "superlative_quantifiers_1",
+    "superlative_quantifiers_2",
+    "tough_vs_raising_1",
+    "tough_vs_raising_2",
+    "transitive",
+    "wh_island",
+    "wh_questions_object_gap",
+    "wh_questions_subject_gap",
+    "wh_questions_subject_gap_long_distance",
+    "wh_vs_that_no_gap",
+    "wh_vs_that_no_gap_long_distance",
+    "wh_vs_that_with_gap",
+    "wh_vs_that_with_gap_long_distance",
+]
+def main() -> None:
+    for task in all_subtasks:
+        file_name = f"{task}.yaml"
+        try:
+            with open(f"{file_name}", "w", encoding="utf-8") as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    {
+                        "include": "_template_yaml",
+                        "task": "blimp_" + task,
+                        "dataset_name": task,
+                    },
+                    f,
+                )
+        except FileExistsError:
+            pass
+if __name__ == "__main__":
+    main()

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: intransitive
+include: _template_yaml
+task: blimp_intransitive

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: irregular_past_participle_adjectives
+include: _template_yaml
+task: blimp_irregular_past_participle_adjectives

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: irregular_past_participle_verbs
+include: _template_yaml
+task: blimp_irregular_past_participle_verbs

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: irregular_plural_subject_verb_agreement_1
+include: _template_yaml
+task: blimp_irregular_plural_subject_verb_agreement_1

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: irregular_plural_subject_verb_agreement_2
+include: _template_yaml
+task: blimp_irregular_plural_subject_verb_agreement_2

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_echo_question.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: left_branch_island_echo_question
+include: _template_yaml
+task: blimp_left_branch_island_echo_question

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_simple_question.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: left_branch_island_simple_question
+include: _template_yaml
+task: blimp_left_branch_island_simple_question

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: matrix_question_npi_licensor_present
+include: _template_yaml
+task: blimp_matrix_question_npi_licensor_present

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: npi_present_1
+include: _template_yaml
+task: blimp_npi_present_1

scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Generated by utils.py
+dataset_name: npi_present_2
+include: _template_yaml
+task: blimp_npi_present_2