koichi12 commited on
Commit
44dd60b
·
verified ·
1 Parent(s): 222810a

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml +18 -0
  2. scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-chinese.yaml +3 -0
  3. scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-geography.yaml +3 -0
  4. scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-mathqa.yaml +3 -0
  5. scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-physics.yaml +3 -0
  6. scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml +3 -0
  7. scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml +3 -0
  8. scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml +3 -0
  9. scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml +3 -0
  10. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/README.md +52 -0
  11. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml +75 -0
  12. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml +13 -0
  13. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/adjunct_island.yaml +4 -0
  14. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_gender_agreement.yaml +4 -0
  15. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_number_agreement.yaml +4 -0
  16. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_passive.yaml +4 -0
  17. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_trans.yaml +4 -0
  18. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml +4 -0
  19. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/complex_NP_island.yaml +4 -0
  20. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml +4 -0
  21. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml +4 -0
  22. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml +4 -0
  23. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml +4 -0
  24. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml +4 -0
  25. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml +4 -0
  26. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml +4 -0
  27. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml +4 -0
  28. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml +4 -0
  29. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml +4 -0
  30. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml +4 -0
  31. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml +4 -0
  32. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml +4 -0
  33. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml +4 -0
  34. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml +4 -0
  35. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_object_raising.yaml +4 -0
  36. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml +4 -0
  37. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml +4 -0
  38. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_subject_raising.yaml +4 -0
  39. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/expletive_it_object_raising.yaml +4 -0
  40. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/generate_configs.py +94 -0
  41. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml +4 -0
  42. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml +4 -0
  43. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml +4 -0
  44. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml +4 -0
  45. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml +4 -0
  46. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_echo_question.yaml +4 -0
  47. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_simple_question.yaml +4 -0
  48. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml +4 -0
  49. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml +4 -0
  50. scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ group: agieval_en
2
+ task:
3
+ - agieval_aqua_rat
4
+ - agieval_gaokao_english # categorizing as EN because the AGIEval codebase lists this as in `english_qa_tasks`
5
+ - agieval_logiqa_en
6
+ - agieval_lsat_ar
7
+ - agieval_lsat_lr
8
+ - agieval_lsat_rc
9
+ - agieval_math
10
+ - agieval_sat_en_without_passage
11
+ - agieval_sat_en
12
+ - agieval_sat_math
13
+ aggregate_metric_list:
14
+ - metric: acc
15
+ aggregation: mean
16
+ weight_by_size: true
17
+ metadata:
18
+ version: 0.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-chinese.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ include: aqua-rat.yaml
2
+ task: agieval_gaokao_chinese
3
+ dataset_path: hails/agieval-gaokao-chinese
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-geography.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ include: aqua-rat.yaml
2
+ task: agieval_gaokao_geography
3
+ dataset_path: hails/agieval-gaokao-geography
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-mathqa.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ include: aqua-rat.yaml
2
+ task: agieval_gaokao_mathqa
3
+ dataset_path: hails/agieval-gaokao-mathqa
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-physics.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ include: aqua-rat.yaml
2
+ task: agieval_gaokao_physics
3
+ dataset_path: hails/agieval-gaokao-physics
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ include: aqua-rat.yaml
2
+ task: agieval_logiqa_en
3
+ dataset_path: hails/agieval-logiqa-en
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ include: aqua-rat.yaml
2
+ task: agieval_logiqa_zh
3
+ dataset_path: hails/agieval-logiqa-zh
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ include: aqua-rat.yaml
2
+ task: agieval_lsat_rc
3
+ dataset_path: hails/agieval-lsat-rc
scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ include: aqua-rat.yaml
2
+ task: agieval_sat_en
3
+ dataset_path: hails/agieval-sat-en
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task-name
2
+
3
+ ### Paper
4
+
5
+ Title: `BLiMP: A Benchmark of Linguistic Minimal Pairs for English`
6
+ Abstract: `https://arxiv.org/abs/1912.00582`
7
+
8
+ BLiMP is a challenge set for evaluating what language models (LMs) know about
9
+ major grammatical phenomena in English. BLiMP consists of 67 sub-datasets, each
10
+ containing 1000 minimal pairs isolating specific contrasts in syntax, morphology,
11
+ or semantics. The data is automatically generated according to expert-crafted
12
+ grammars.
13
+
14
+ Homepage: https://github.com/alexwarstadt/blimp
15
+
16
+
17
+ ### Citation
18
+
19
+ ```
20
+ @article{warstadt2019blimp,
21
+ author = {Warstadt, Alex and Parrish, Alicia and Liu, Haokun and Mohananey, Anhad and Peng, Wei and Wang, Sheng-Fu and Bowman, Samuel R.},
22
+ title = {BLiMP: The Benchmark of Linguistic Minimal Pairs for English},
23
+ journal = {Transactions of the Association for Computational Linguistics},
24
+ volume = {8},
25
+ number = {},
26
+ pages = {377-392},
27
+ year = {2020},
28
+ doi = {10.1162/tacl\_a\_00321},
29
+ URL = {https://doi.org/10.1162/tacl_a_00321},
30
+ eprint = {https://doi.org/10.1162/tacl_a_00321},
31
+ abstract = { We introduce The Benchmark of Linguistic Minimal Pairs (BLiMP),1 a challenge set for evaluating the linguistic knowledge of language models (LMs) on major grammatical phenomena in English. BLiMP consists of 67 individual datasets, each containing 1,000 minimal pairs—that is, pairs of minimally different sentences that contrast in grammatical acceptability and isolate specific phenomenon in syntax, morphology, or semantics. We generate the data according to linguist-crafted grammar templates, and human aggregate agreement with the labels is 96.4\%. We evaluate n-gram, LSTM, and Transformer (GPT-2 and Transformer-XL) LMs by observing whether they assign a higher probability to the acceptable sentence in each minimal pair. We find that state-of-the-art models identify morphological contrasts related to agreement reliably, but they struggle with some subtle semantic and syntactic phenomena, such as negative polarity items and extraction islands. }
32
+ }
33
+ ```
34
+
35
+ ### Subtasks
36
+
37
+ List or describe tasks defined in this folder, and their names here:
38
+ * `task_name`: `1-sentence description of what this particular task does`
39
+ * `task_name2`: .....
40
+
41
+ ### Checklist
42
+
43
+ For adding novel benchmarks/datasets to the library:
44
+ * [ ] Is the task an existing benchmark in the literature?
45
+ * [ ] Have you referenced the original paper that introduced the task?
46
+ * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
47
+
48
+
49
+ If other tasks on this dataset are already supported:
50
+ * [ ] Is the "Main" variant of this task clearly denoted?
51
+ * [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates?
52
+ * [ ] Have you noted which, if any, published evaluation setups are matched by this variant?
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ group: blimp
2
+ task:
3
+ - "blimp_adjunct_island"
4
+ - "blimp_anaphor_gender_agreement"
5
+ - "blimp_anaphor_number_agreement"
6
+ - "blimp_animate_subject_passive"
7
+ - "blimp_animate_subject_trans"
8
+ - "blimp_causative"
9
+ - "blimp_complex_NP_island"
10
+ - "blimp_coordinate_structure_constraint_complex_left_branch"
11
+ - "blimp_coordinate_structure_constraint_object_extraction"
12
+ - "blimp_determiner_noun_agreement_1"
13
+ - "blimp_determiner_noun_agreement_2"
14
+ - "blimp_determiner_noun_agreement_irregular_1"
15
+ - "blimp_determiner_noun_agreement_irregular_2"
16
+ - "blimp_determiner_noun_agreement_with_adj_2"
17
+ - "blimp_determiner_noun_agreement_with_adj_irregular_1"
18
+ - "blimp_determiner_noun_agreement_with_adj_irregular_2"
19
+ - "blimp_determiner_noun_agreement_with_adjective_1"
20
+ - "blimp_distractor_agreement_relational_noun"
21
+ - "blimp_distractor_agreement_relative_clause"
22
+ - "blimp_drop_argument"
23
+ - "blimp_ellipsis_n_bar_1"
24
+ - "blimp_ellipsis_n_bar_2"
25
+ - "blimp_existential_there_object_raising"
26
+ - "blimp_existential_there_quantifiers_1"
27
+ - "blimp_existential_there_quantifiers_2"
28
+ - "blimp_existential_there_subject_raising"
29
+ - "blimp_expletive_it_object_raising"
30
+ - "blimp_inchoative"
31
+ - "blimp_intransitive"
32
+ - "blimp_irregular_past_participle_adjectives"
33
+ - "blimp_irregular_past_participle_verbs"
34
+ - "blimp_irregular_plural_subject_verb_agreement_1"
35
+ - "blimp_irregular_plural_subject_verb_agreement_2"
36
+ - "blimp_left_branch_island_echo_question"
37
+ - "blimp_left_branch_island_simple_question"
38
+ - "blimp_matrix_question_npi_licensor_present"
39
+ - "blimp_npi_present_1"
40
+ - "blimp_npi_present_2"
41
+ - "blimp_only_npi_licensor_present"
42
+ - "blimp_only_npi_scope"
43
+ - "blimp_passive_1"
44
+ - "blimp_passive_2"
45
+ - "blimp_principle_A_c_command"
46
+ - "blimp_principle_A_case_1"
47
+ - "blimp_principle_A_case_2"
48
+ - "blimp_principle_A_domain_1"
49
+ - "blimp_principle_A_domain_2"
50
+ - "blimp_principle_A_domain_3"
51
+ - "blimp_principle_A_reconstruction"
52
+ - "blimp_regular_plural_subject_verb_agreement_1"
53
+ - "blimp_regular_plural_subject_verb_agreement_2"
54
+ - "blimp_sentential_negation_npi_licensor_present"
55
+ - "blimp_sentential_negation_npi_scope"
56
+ - "blimp_sentential_subject_island"
57
+ - "blimp_superlative_quantifiers_1"
58
+ - "blimp_superlative_quantifiers_2"
59
+ - "blimp_tough_vs_raising_1"
60
+ - "blimp_tough_vs_raising_2"
61
+ - "blimp_transitive"
62
+ - "blimp_wh_island"
63
+ - "blimp_wh_questions_object_gap"
64
+ - "blimp_wh_questions_subject_gap"
65
+ - "blimp_wh_questions_subject_gap_long_distance"
66
+ - "blimp_wh_vs_that_no_gap"
67
+ - "blimp_wh_vs_that_no_gap_long_distance"
68
+ - "blimp_wh_vs_that_with_gap"
69
+ - "blimp_wh_vs_that_with_gap_long_distance"
70
+ aggregate_metric_list:
71
+ - metric: acc
72
+ aggregation: mean
73
+ weight_by_size: False
74
+ metadata:
75
+ version: 2.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_path: blimp
2
+ output_type: multiple_choice
3
+ validation_split: train
4
+ doc_to_text: ""
5
+ doc_to_target: 0
6
+ doc_to_choice: "{{[sentence_good, sentence_bad]}}"
7
+ num_fewshot: 0
8
+ should_decontaminate: true
9
+ doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}"
10
+ metric_list:
11
+ - metric: acc
12
+ metadata:
13
+ version: 1.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/adjunct_island.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: adjunct_island
3
+ include: _template_yaml
4
+ task: blimp_adjunct_island
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_gender_agreement.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: anaphor_gender_agreement
3
+ include: _template_yaml
4
+ task: blimp_anaphor_gender_agreement
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_number_agreement.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: anaphor_number_agreement
3
+ include: _template_yaml
4
+ task: blimp_anaphor_number_agreement
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_passive.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: animate_subject_passive
3
+ include: _template_yaml
4
+ task: blimp_animate_subject_passive
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_trans.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: animate_subject_trans
3
+ include: _template_yaml
4
+ task: blimp_animate_subject_trans
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: causative
3
+ include: _template_yaml
4
+ task: blimp_causative
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/complex_NP_island.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: complex_NP_island
3
+ include: _template_yaml
4
+ task: blimp_complex_NP_island
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: coordinate_structure_constraint_complex_left_branch
3
+ include: _template_yaml
4
+ task: blimp_coordinate_structure_constraint_complex_left_branch
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: coordinate_structure_constraint_object_extraction
3
+ include: _template_yaml
4
+ task: blimp_coordinate_structure_constraint_object_extraction
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: determiner_noun_agreement_1
3
+ include: _template_yaml
4
+ task: blimp_determiner_noun_agreement_1
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: determiner_noun_agreement_2
3
+ include: _template_yaml
4
+ task: blimp_determiner_noun_agreement_2
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: determiner_noun_agreement_irregular_1
3
+ include: _template_yaml
4
+ task: blimp_determiner_noun_agreement_irregular_1
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: determiner_noun_agreement_irregular_2
3
+ include: _template_yaml
4
+ task: blimp_determiner_noun_agreement_irregular_2
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: determiner_noun_agreement_with_adj_2
3
+ include: _template_yaml
4
+ task: blimp_determiner_noun_agreement_with_adj_2
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: determiner_noun_agreement_with_adj_irregular_1
3
+ include: _template_yaml
4
+ task: blimp_determiner_noun_agreement_with_adj_irregular_1
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: determiner_noun_agreement_with_adj_irregular_2
3
+ include: _template_yaml
4
+ task: blimp_determiner_noun_agreement_with_adj_irregular_2
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: determiner_noun_agreement_with_adjective_1
3
+ include: _template_yaml
4
+ task: blimp_determiner_noun_agreement_with_adjective_1
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: distractor_agreement_relational_noun
3
+ include: _template_yaml
4
+ task: blimp_distractor_agreement_relational_noun
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: distractor_agreement_relative_clause
3
+ include: _template_yaml
4
+ task: blimp_distractor_agreement_relative_clause
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: drop_argument
3
+ include: _template_yaml
4
+ task: blimp_drop_argument
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: ellipsis_n_bar_1
3
+ include: _template_yaml
4
+ task: blimp_ellipsis_n_bar_1
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: ellipsis_n_bar_2
3
+ include: _template_yaml
4
+ task: blimp_ellipsis_n_bar_2
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_object_raising.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: existential_there_object_raising
3
+ include: _template_yaml
4
+ task: blimp_existential_there_object_raising
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: existential_there_quantifiers_1
3
+ include: _template_yaml
4
+ task: blimp_existential_there_quantifiers_1
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: existential_there_quantifiers_2
3
+ include: _template_yaml
4
+ task: blimp_existential_there_quantifiers_2
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_subject_raising.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: existential_there_subject_raising
3
+ include: _template_yaml
4
+ task: blimp_existential_there_subject_raising
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/expletive_it_object_raising.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: expletive_it_object_raising
3
+ include: _template_yaml
4
+ task: blimp_expletive_it_object_raising
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/generate_configs.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+
3
+
4
+ all_subtasks = [
5
+ "adjunct_island",
6
+ "anaphor_gender_agreement",
7
+ "anaphor_number_agreement",
8
+ "animate_subject_passive",
9
+ "animate_subject_trans",
10
+ "causative",
11
+ "complex_NP_island",
12
+ "coordinate_structure_constraint_complex_left_branch",
13
+ "coordinate_structure_constraint_object_extraction",
14
+ "determiner_noun_agreement_1",
15
+ "determiner_noun_agreement_2",
16
+ "determiner_noun_agreement_irregular_1",
17
+ "determiner_noun_agreement_irregular_2",
18
+ "determiner_noun_agreement_with_adj_2",
19
+ "determiner_noun_agreement_with_adj_irregular_1",
20
+ "determiner_noun_agreement_with_adj_irregular_2",
21
+ "determiner_noun_agreement_with_adjective_1",
22
+ "distractor_agreement_relational_noun",
23
+ "distractor_agreement_relative_clause",
24
+ "drop_argument",
25
+ "ellipsis_n_bar_1",
26
+ "ellipsis_n_bar_2",
27
+ "existential_there_object_raising",
28
+ "existential_there_quantifiers_1",
29
+ "existential_there_quantifiers_2",
30
+ "existential_there_subject_raising",
31
+ "expletive_it_object_raising",
32
+ "inchoative",
33
+ "intransitive",
34
+ "irregular_past_participle_adjectives",
35
+ "irregular_past_participle_verbs",
36
+ "irregular_plural_subject_verb_agreement_1",
37
+ "irregular_plural_subject_verb_agreement_2",
38
+ "left_branch_island_echo_question",
39
+ "left_branch_island_simple_question",
40
+ "matrix_question_npi_licensor_present",
41
+ "npi_present_1",
42
+ "npi_present_2",
43
+ "only_npi_licensor_present",
44
+ "only_npi_scope",
45
+ "passive_1",
46
+ "passive_2",
47
+ "principle_A_c_command",
48
+ "principle_A_case_1",
49
+ "principle_A_case_2",
50
+ "principle_A_domain_1",
51
+ "principle_A_domain_2",
52
+ "principle_A_domain_3",
53
+ "principle_A_reconstruction",
54
+ "regular_plural_subject_verb_agreement_1",
55
+ "regular_plural_subject_verb_agreement_2",
56
+ "sentential_negation_npi_licensor_present",
57
+ "sentential_negation_npi_scope",
58
+ "sentential_subject_island",
59
+ "superlative_quantifiers_1",
60
+ "superlative_quantifiers_2",
61
+ "tough_vs_raising_1",
62
+ "tough_vs_raising_2",
63
+ "transitive",
64
+ "wh_island",
65
+ "wh_questions_object_gap",
66
+ "wh_questions_subject_gap",
67
+ "wh_questions_subject_gap_long_distance",
68
+ "wh_vs_that_no_gap",
69
+ "wh_vs_that_no_gap_long_distance",
70
+ "wh_vs_that_with_gap",
71
+ "wh_vs_that_with_gap_long_distance",
72
+ ]
73
+
74
+
75
+ def main() -> None:
76
+ for task in all_subtasks:
77
+ file_name = f"{task}.yaml"
78
+ try:
79
+ with open(f"{file_name}", "w", encoding="utf-8") as f:
80
+ f.write("# Generated by utils.py\n")
81
+ yaml.dump(
82
+ {
83
+ "include": "_template_yaml",
84
+ "task": "blimp_" + task,
85
+ "dataset_name": task,
86
+ },
87
+ f,
88
+ )
89
+ except FileExistsError:
90
+ pass
91
+
92
+
93
+ if __name__ == "__main__":
94
+ main()
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: intransitive
3
+ include: _template_yaml
4
+ task: blimp_intransitive
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: irregular_past_participle_adjectives
3
+ include: _template_yaml
4
+ task: blimp_irregular_past_participle_adjectives
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: irregular_past_participle_verbs
3
+ include: _template_yaml
4
+ task: blimp_irregular_past_participle_verbs
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: irregular_plural_subject_verb_agreement_1
3
+ include: _template_yaml
4
+ task: blimp_irregular_plural_subject_verb_agreement_1
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: irregular_plural_subject_verb_agreement_2
3
+ include: _template_yaml
4
+ task: blimp_irregular_plural_subject_verb_agreement_2
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_echo_question.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: left_branch_island_echo_question
3
+ include: _template_yaml
4
+ task: blimp_left_branch_island_echo_question
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_simple_question.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: left_branch_island_simple_question
3
+ include: _template_yaml
4
+ task: blimp_left_branch_island_simple_question
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: matrix_question_npi_licensor_present
3
+ include: _template_yaml
4
+ task: blimp_matrix_question_npi_licensor_present
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: npi_present_1
3
+ include: _template_yaml
4
+ task: blimp_npi_present_1
scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: npi_present_2
3
+ include: _template_yaml
4
+ task: blimp_npi_present_2