diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a873d66d3a4e98fc2ce2df26e53f20a599bc4e8
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml
@@ -0,0 +1,18 @@
+group: agieval_en
+task:
+  - agieval_aqua_rat
+  - agieval_gaokao_english # categorizing as EN because the AGIEval codebase lists this as in `english_qa_tasks`
+  - agieval_logiqa_en
+  - agieval_lsat_ar
+  - agieval_lsat_lr
+  - agieval_lsat_rc
+  - agieval_math
+  - agieval_sat_en_without_passage
+  - agieval_sat_en
+  - agieval_sat_math
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 0.0
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-chinese.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-chinese.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3d58b5bc495917482ef69f04604b7f78f91339f5
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-chinese.yaml
@@ -0,0 +1,3 @@
+include: aqua-rat.yaml
+task: agieval_gaokao_chinese
+dataset_path: hails/agieval-gaokao-chinese
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-geography.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-geography.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6dbce6f4873e272f9c28f49b0061857060df2e97
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-geography.yaml
@@ -0,0 +1,3 @@
+include: aqua-rat.yaml
+task: agieval_gaokao_geography
+dataset_path: hails/agieval-gaokao-geography
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-mathqa.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-mathqa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e0d97a515559d9eaecca8bc73949a1d6886b1922
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-mathqa.yaml
@@ -0,0 +1,3 @@
+include: aqua-rat.yaml
+task: agieval_gaokao_mathqa
+dataset_path: hails/agieval-gaokao-mathqa
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-physics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..43a047edafd06ab29c666741dd4f28560c64eff9
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/gaokao-physics.yaml
@@ -0,0 +1,3 @@
+include: aqua-rat.yaml
+task: agieval_gaokao_physics
+dataset_path: hails/agieval-gaokao-physics
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5bd1dff40b0017ee23067cd20bc8543eaf8081b2
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml
@@ -0,0 +1,3 @@
+include: aqua-rat.yaml
+task: agieval_logiqa_en
+dataset_path: hails/agieval-logiqa-en
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2ca9198b53240e04e69a617274f93964be067539
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml
@@ -0,0 +1,3 @@
+include: aqua-rat.yaml
+task: agieval_logiqa_zh
+dataset_path: hails/agieval-logiqa-zh
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..23a9dce7d3853af35091d0bd32df1dbd481ab7aa
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml
@@ -0,0 +1,3 @@
+include: aqua-rat.yaml
+task: agieval_lsat_rc
+dataset_path: hails/agieval-lsat-rc
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..793d48aec2228daef7431f846e3166de0e12a602
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml
@@ -0,0 +1,3 @@
+include: aqua-rat.yaml
+task: agieval_sat_en
+dataset_path: hails/agieval-sat-en
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/README.md b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d3877a23866e75bd666b877c1225b956a226ba81
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/README.md
@@ -0,0 +1,52 @@
+# Task-name
+
+### Paper
+
+Title: `BLiMP: A Benchmark of Linguistic Minimal Pairs for English`
+Abstract: `https://arxiv.org/abs/1912.00582`
+
+BLiMP is a challenge set for evaluating what language models (LMs) know about
+major grammatical phenomena in English. BLiMP consists of 67 sub-datasets, each
+containing 1000 minimal pairs isolating specific contrasts in syntax, morphology,
+or semantics. The data is automatically generated according to expert-crafted
+grammars.
+
+Homepage: https://github.com/alexwarstadt/blimp
+
+
+### Citation
+
+```
+@article{warstadt2019blimp,
+    author = {Warstadt, Alex and Parrish, Alicia and Liu, Haokun and Mohananey, Anhad and Peng, Wei and Wang, Sheng-Fu and Bowman, Samuel R.},
+    title = {BLiMP: The Benchmark of Linguistic Minimal Pairs for English},
+    journal = {Transactions of the Association for Computational Linguistics},
+    volume = {8},
+    number = {},
+    pages = {377-392},
+    year = {2020},
+    doi = {10.1162/tacl\_a\_00321},
+    URL = {https://doi.org/10.1162/tacl_a_00321},
+    eprint = {https://doi.org/10.1162/tacl_a_00321},
+    abstract = { We introduce The Benchmark of Linguistic Minimal Pairs (BLiMP),1 a challenge set for evaluating the linguistic knowledge of language models (LMs) on major grammatical phenomena in English. BLiMP consists of 67 individual datasets, each containing 1,000 minimal pairs—that is, pairs of minimally different sentences that contrast in grammatical acceptability and isolate specific phenomenon in syntax, morphology, or semantics. We generate the data according to linguist-crafted grammar templates, and human aggregate agreement with the labels is 96.4\%. We evaluate n-gram, LSTM, and Transformer (GPT-2 and Transformer-XL) LMs by observing whether they assign a higher probability to the acceptable sentence in each minimal pair. We find that state-of-the-art models identify morphological contrasts related to agreement reliably, but they struggle with some subtle semantic and syntactic phenomena, such as negative polarity items and extraction islands. }
+}
+```
+
+### Subtasks
+
+List or describe tasks defined in this folder, and their names here:
+* `task_name`: `1-sentence description of what this particular task does`
+* `task_name2`: .....
+
+### Checklist
+
+For adding novel benchmarks/datasets to the library:
+* [ ] Is the task an existing benchmark in the literature?
+  * [ ] Have you referenced the original paper that introduced the task?
+  * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
+
+
+If other tasks on this dataset are already supported:
+* [ ] Is the "Main" variant of this task clearly denoted?
+* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates?
+* [ ] Have you noted which, if any, published evaluation setups are matched by this variant?
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6393eeada381684c85a119b83c68d2c759787f44
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml
@@ -0,0 +1,75 @@
+group: blimp
+task:
+  - "blimp_adjunct_island"
+  - "blimp_anaphor_gender_agreement"
+  - "blimp_anaphor_number_agreement"
+  - "blimp_animate_subject_passive"
+  - "blimp_animate_subject_trans"
+  - "blimp_causative"
+  - "blimp_complex_NP_island"
+  - "blimp_coordinate_structure_constraint_complex_left_branch"
+  - "blimp_coordinate_structure_constraint_object_extraction"
+  - "blimp_determiner_noun_agreement_1"
+  - "blimp_determiner_noun_agreement_2"
+  - "blimp_determiner_noun_agreement_irregular_1"
+  - "blimp_determiner_noun_agreement_irregular_2"
+  - "blimp_determiner_noun_agreement_with_adj_2"
+  - "blimp_determiner_noun_agreement_with_adj_irregular_1"
+  - "blimp_determiner_noun_agreement_with_adj_irregular_2"
+  - "blimp_determiner_noun_agreement_with_adjective_1"
+  - "blimp_distractor_agreement_relational_noun"
+  - "blimp_distractor_agreement_relative_clause"
+  - "blimp_drop_argument"
+  - "blimp_ellipsis_n_bar_1"
+  - "blimp_ellipsis_n_bar_2"
+  - "blimp_existential_there_object_raising"
+  - "blimp_existential_there_quantifiers_1"
+  - "blimp_existential_there_quantifiers_2"
+  - "blimp_existential_there_subject_raising"
+  - "blimp_expletive_it_object_raising"
+  - "blimp_inchoative"
+  - "blimp_intransitive"
+  - "blimp_irregular_past_participle_adjectives"
+  - "blimp_irregular_past_participle_verbs"
+  - "blimp_irregular_plural_subject_verb_agreement_1"
+  - "blimp_irregular_plural_subject_verb_agreement_2"
+  - "blimp_left_branch_island_echo_question"
+  - "blimp_left_branch_island_simple_question"
+  - "blimp_matrix_question_npi_licensor_present"
+  - "blimp_npi_present_1"
+  - "blimp_npi_present_2"
+  - "blimp_only_npi_licensor_present"
+  - "blimp_only_npi_scope"
+  - "blimp_passive_1"
+  - "blimp_passive_2"
+  - "blimp_principle_A_c_command"
+  - "blimp_principle_A_case_1"
+  - "blimp_principle_A_case_2"
+  - "blimp_principle_A_domain_1"
+  - "blimp_principle_A_domain_2"
+  - "blimp_principle_A_domain_3"
+  - "blimp_principle_A_reconstruction"
+  - "blimp_regular_plural_subject_verb_agreement_1"
+  - "blimp_regular_plural_subject_verb_agreement_2"
+  - "blimp_sentential_negation_npi_licensor_present"
+  - "blimp_sentential_negation_npi_scope"
+  - "blimp_sentential_subject_island"
+  - "blimp_superlative_quantifiers_1"
+  - "blimp_superlative_quantifiers_2"
+  - "blimp_tough_vs_raising_1"
+  - "blimp_tough_vs_raising_2"
+  - "blimp_transitive"
+  - "blimp_wh_island"
+  - "blimp_wh_questions_object_gap"
+  - "blimp_wh_questions_subject_gap"
+  - "blimp_wh_questions_subject_gap_long_distance"
+  - "blimp_wh_vs_that_no_gap"
+  - "blimp_wh_vs_that_no_gap_long_distance"
+  - "blimp_wh_vs_that_with_gap"
+  - "blimp_wh_vs_that_with_gap_long_distance"
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: False
+metadata:
+  version: 2.0
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f81e7938af5c6e0ddbc4605e2c39acb1a9f0b374
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml
@@ -0,0 +1,13 @@
+dataset_path: blimp
+output_type: multiple_choice
+validation_split: train
+doc_to_text: ""
+doc_to_target: 0
+doc_to_choice: "{{[sentence_good, sentence_bad]}}"
+num_fewshot: 0
+should_decontaminate: true
+doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}"
+metric_list:
+  - metric: acc
+metadata:
+  version: 1.0
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/adjunct_island.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/adjunct_island.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abdb4b8c898e71eac1da1de57b4ff9b425a32644
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/adjunct_island.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: adjunct_island
+include: _template_yaml
+task: blimp_adjunct_island
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_gender_agreement.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_gender_agreement.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9117dafad3c43968010d4c595d0ffafcc377de44
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_gender_agreement.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: anaphor_gender_agreement
+include: _template_yaml
+task: blimp_anaphor_gender_agreement
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_number_agreement.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_number_agreement.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e63200c83f41a0f03bd4afba0795e8071952cebd
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/anaphor_number_agreement.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: anaphor_number_agreement
+include: _template_yaml
+task: blimp_anaphor_number_agreement
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_passive.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_passive.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..99118adb9f283a3dc9f5e26fa387915ed3a6a57c
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_passive.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: animate_subject_passive
+include: _template_yaml
+task: blimp_animate_subject_passive
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_trans.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_trans.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d15eb2c77d454ae8e2791cac85601a803f4bd785
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/animate_subject_trans.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: animate_subject_trans
+include: _template_yaml
+task: blimp_animate_subject_trans
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5b82ef3914b5dd34d1417964dacb0bd2f038b190
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: causative
+include: _template_yaml
+task: blimp_causative
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/complex_NP_island.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/complex_NP_island.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f4ccfe41fa0e6e5d3b8d5b46d6f2edaac60606f9
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/complex_NP_island.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: complex_NP_island
+include: _template_yaml
+task: blimp_complex_NP_island
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1acc7d544a1fcf6756264d1ac236c839128ff449
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: coordinate_structure_constraint_complex_left_branch
+include: _template_yaml
+task: blimp_coordinate_structure_constraint_complex_left_branch
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dbcd6ae9c006dd52b37a252097ab0a038a68d190
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: coordinate_structure_constraint_object_extraction
+include: _template_yaml
+task: blimp_coordinate_structure_constraint_object_extraction
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6c27935e834d8ee21001dc897714c9c6e3b4a390
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: determiner_noun_agreement_1
+include: _template_yaml
+task: blimp_determiner_noun_agreement_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b8c715a7b95de1b1f9b03afdb1001ba9b4e94442
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: determiner_noun_agreement_2
+include: _template_yaml
+task: blimp_determiner_noun_agreement_2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c2ab1b6af5c72f76d0826b9725ea651426fc830
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: determiner_noun_agreement_irregular_1
+include: _template_yaml
+task: blimp_determiner_noun_agreement_irregular_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..69c77d12e0174676cbdc1c009d1612ffde8e3d42
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: determiner_noun_agreement_irregular_2
+include: _template_yaml
+task: blimp_determiner_noun_agreement_irregular_2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb8dba60ef1b9aa3a5af3652b86637fe10577116
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: determiner_noun_agreement_with_adj_2
+include: _template_yaml
+task: blimp_determiner_noun_agreement_with_adj_2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..57f12ecade63b595378cb2c9aadf710725e9d4b0
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: determiner_noun_agreement_with_adj_irregular_1
+include: _template_yaml
+task: blimp_determiner_noun_agreement_with_adj_irregular_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6df0e7d52df67c979fb74a440a113addb0c434bf
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: determiner_noun_agreement_with_adj_irregular_2
+include: _template_yaml
+task: blimp_determiner_noun_agreement_with_adj_irregular_2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4512e9176f98a9f2ec3f53de15657b97274809fb
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: determiner_noun_agreement_with_adjective_1
+include: _template_yaml
+task: blimp_determiner_noun_agreement_with_adjective_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..16e3c0217ee09d554edbe8210ff6c78375d267a4
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: distractor_agreement_relational_noun
+include: _template_yaml
+task: blimp_distractor_agreement_relational_noun
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9fbc28c51d663932ae558087f28a0333131148bd
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: distractor_agreement_relative_clause
+include: _template_yaml
+task: blimp_distractor_agreement_relative_clause
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db3b1fed109c802774c1ac8e347a931febc89646
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: drop_argument
+include: _template_yaml
+task: blimp_drop_argument
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3686534f3edf83df2c470a7907678db8ebe85abc
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ellipsis_n_bar_1
+include: _template_yaml
+task: blimp_ellipsis_n_bar_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bac472bdff2f61df39eb2fec55a98c44ca86b702
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ellipsis_n_bar_2
+include: _template_yaml
+task: blimp_ellipsis_n_bar_2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_object_raising.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_object_raising.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..765596462dce91f51b557fca254deef3a2ee325e
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_object_raising.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: existential_there_object_raising
+include: _template_yaml
+task: blimp_existential_there_object_raising
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..15396ae3acadcada2e12549deeacd66b856d5a69
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: existential_there_quantifiers_1
+include: _template_yaml
+task: blimp_existential_there_quantifiers_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81370693b6be13ce5b187f0954ae45aa7156d9d7
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: existential_there_quantifiers_2
+include: _template_yaml
+task: blimp_existential_there_quantifiers_2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_subject_raising.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_subject_raising.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..45e18aebb660ed759099230686c0e1ae24ea3f86
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/existential_there_subject_raising.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: existential_there_subject_raising
+include: _template_yaml
+task: blimp_existential_there_subject_raising
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/expletive_it_object_raising.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/expletive_it_object_raising.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ee8d01875cec8b19ae74124fad0e1103c87e480
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/expletive_it_object_raising.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: expletive_it_object_raising
+include: _template_yaml
+task: blimp_expletive_it_object_raising
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/generate_configs.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/generate_configs.py
new file mode 100644
index 0000000000000000000000000000000000000000..a32c366834592041bde8b5fcaf2cc3c821f40f6f
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/generate_configs.py
@@ -0,0 +1,94 @@
+import yaml
+
+
+all_subtasks = [
+    "adjunct_island",
+    "anaphor_gender_agreement",
+    "anaphor_number_agreement",
+    "animate_subject_passive",
+    "animate_subject_trans",
+    "causative",
+    "complex_NP_island",
+    "coordinate_structure_constraint_complex_left_branch",
+    "coordinate_structure_constraint_object_extraction",
+    "determiner_noun_agreement_1",
+    "determiner_noun_agreement_2",
+    "determiner_noun_agreement_irregular_1",
+    "determiner_noun_agreement_irregular_2",
+    "determiner_noun_agreement_with_adj_2",
+    "determiner_noun_agreement_with_adj_irregular_1",
+    "determiner_noun_agreement_with_adj_irregular_2",
+    "determiner_noun_agreement_with_adjective_1",
+    "distractor_agreement_relational_noun",
+    "distractor_agreement_relative_clause",
+    "drop_argument",
+    "ellipsis_n_bar_1",
+    "ellipsis_n_bar_2",
+    "existential_there_object_raising",
+    "existential_there_quantifiers_1",
+    "existential_there_quantifiers_2",
+    "existential_there_subject_raising",
+    "expletive_it_object_raising",
+    "inchoative",
+    "intransitive",
+    "irregular_past_participle_adjectives",
+    "irregular_past_participle_verbs",
+    "irregular_plural_subject_verb_agreement_1",
+    "irregular_plural_subject_verb_agreement_2",
+    "left_branch_island_echo_question",
+    "left_branch_island_simple_question",
+    "matrix_question_npi_licensor_present",
+    "npi_present_1",
+    "npi_present_2",
+    "only_npi_licensor_present",
+    "only_npi_scope",
+    "passive_1",
+    "passive_2",
+    "principle_A_c_command",
+    "principle_A_case_1",
+    "principle_A_case_2",
+    "principle_A_domain_1",
+    "principle_A_domain_2",
+    "principle_A_domain_3",
+    "principle_A_reconstruction",
+    "regular_plural_subject_verb_agreement_1",
+    "regular_plural_subject_verb_agreement_2",
+    "sentential_negation_npi_licensor_present",
+    "sentential_negation_npi_scope",
+    "sentential_subject_island",
+    "superlative_quantifiers_1",
+    "superlative_quantifiers_2",
+    "tough_vs_raising_1",
+    "tough_vs_raising_2",
+    "transitive",
+    "wh_island",
+    "wh_questions_object_gap",
+    "wh_questions_subject_gap",
+    "wh_questions_subject_gap_long_distance",
+    "wh_vs_that_no_gap",
+    "wh_vs_that_no_gap_long_distance",
+    "wh_vs_that_with_gap",
+    "wh_vs_that_with_gap_long_distance",
+]
+
+
+def main() -> None:
+    for task in all_subtasks:
+        file_name = f"{task}.yaml"
+        try:
+            with open(f"{file_name}", "w", encoding="utf-8") as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    {
+                        "include": "_template_yaml",
+                        "task": "blimp_" + task,
+                        "dataset_name": task,
+                    },
+                    f,
+                )
+        except FileExistsError:
+            pass
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d5b7edbdc26833f7ae645889d8642077fd979bc
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: intransitive
+include: _template_yaml
+task: blimp_intransitive
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe9097d6673f9a3d5d05f511f9ea48940f41d44f
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: irregular_past_participle_adjectives
+include: _template_yaml
+task: blimp_irregular_past_participle_adjectives
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..906fb347710e46c3159aaee05def45730b30929f
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: irregular_past_participle_verbs
+include: _template_yaml
+task: blimp_irregular_past_participle_verbs
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..537c7764f671636cfb781382397f525d0fba305a
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: irregular_plural_subject_verb_agreement_1
+include: _template_yaml
+task: blimp_irregular_plural_subject_verb_agreement_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5d3b84fceab0e3907ab6b1bd3e44a0e6c9445416
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: irregular_plural_subject_verb_agreement_2
+include: _template_yaml
+task: blimp_irregular_plural_subject_verb_agreement_2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_echo_question.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_echo_question.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..409e8ccca8a101366a0f881e775a7dcf9ff317b6
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_echo_question.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: left_branch_island_echo_question
+include: _template_yaml
+task: blimp_left_branch_island_echo_question
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_simple_question.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_simple_question.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..214de3c2edb49de48878e6baed1bf725c9728b98
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/left_branch_island_simple_question.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: left_branch_island_simple_question
+include: _template_yaml
+task: blimp_left_branch_island_simple_question
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..712cf4313ee90bc407b86d51c49fcaa3198247f8
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: matrix_question_npi_licensor_present
+include: _template_yaml
+task: blimp_matrix_question_npi_licensor_present
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4031b4cf5f691d24486a144455a06c9f84ca2b86
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: npi_present_1
+include: _template_yaml
+task: blimp_npi_present_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8b401a9fce3deefd32f83315f55993739e9c26b3
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: npi_present_2
+include: _template_yaml
+task: blimp_npi_present_2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_licensor_present.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_licensor_present.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8dbce62337d39d44aed2f0f14cfd51dec367a42c
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_licensor_present.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: only_npi_licensor_present
+include: _template_yaml
+task: blimp_only_npi_licensor_present
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_scope.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_scope.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4386575f591b9f03cf12f37e04ee8632c4fbec79
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/only_npi_scope.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: only_npi_scope
+include: _template_yaml
+task: blimp_only_npi_scope
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0dd6aca0535d448d9269ae1959063d687955a17f
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: passive_1
+include: _template_yaml
+task: blimp_passive_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_c_command.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_c_command.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b9dfa123588d518f68748cf102dbd72941296059
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_c_command.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: principle_A_c_command
+include: _template_yaml
+task: blimp_principle_A_c_command
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..552f8a1e2423a6a4b7c1ea6a57b10f15fdbdbd1d
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: principle_A_case_1
+include: _template_yaml
+task: blimp_principle_A_case_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..85aa920a268d5dbc4d7c69df746d4b70e334d206
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_case_2.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: principle_A_case_2
+include: _template_yaml
+task: blimp_principle_A_case_2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb06e731c5836934df3cbf8f77b1a768e248271d
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: principle_A_domain_1
+include: _template_yaml
+task: blimp_principle_A_domain_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec3be9a64d0bb5a408a905ed1b72c0b3eaf603c9
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_2.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: principle_A_domain_2
+include: _template_yaml
+task: blimp_principle_A_domain_2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_3.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_3.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e6ff32b71e82396c1ce36632503bd5f12e84d1b8
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_domain_3.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: principle_A_domain_3
+include: _template_yaml
+task: blimp_principle_A_domain_3
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_reconstruction.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_reconstruction.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5e2cdadc34fc0c7c3e14c8ab24ce0d522f7835d0
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/principle_A_reconstruction.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: principle_A_reconstruction
+include: _template_yaml
+task: blimp_principle_A_reconstruction
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d4df1f7216513f772006c5742917f692e827d59
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: regular_plural_subject_verb_agreement_1
+include: _template_yaml
+task: blimp_regular_plural_subject_verb_agreement_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..37cdb781391d0280c96458b6cf8493d65ca00d3c
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_2.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: regular_plural_subject_verb_agreement_2
+include: _template_yaml
+task: blimp_regular_plural_subject_verb_agreement_2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_licensor_present.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_licensor_present.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..df607e5c79e02ef8b284ce2b458ba5371951fc89
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_licensor_present.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sentential_negation_npi_licensor_present
+include: _template_yaml
+task: blimp_sentential_negation_npi_licensor_present
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..854d9e5d86e393abbbca986cfebbd6156465f1eb
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sentential_negation_npi_scope
+include: _template_yaml
+task: blimp_sentential_negation_npi_scope
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_subject_island.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_subject_island.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e26341a80a3ffb03e16aa0dc3c10471a4ca4ae3e
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/sentential_subject_island.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sentential_subject_island
+include: _template_yaml
+task: blimp_sentential_subject_island
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/superlative_quantifiers_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/superlative_quantifiers_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c3cf8bfc238feb272c290621c9d55772cb6f5dc4
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/superlative_quantifiers_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: superlative_quantifiers_1
+include: _template_yaml
+task: blimp_superlative_quantifiers_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/tough_vs_raising_1.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/tough_vs_raising_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7abc4dc28ddb4074bcb2db2f8d706119b1ca08d3
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/tough_vs_raising_1.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: tough_vs_raising_1
+include: _template_yaml
+task: blimp_tough_vs_raising_1
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..18864352a9b1bfdb26c146af8333f9c0dfc4beec
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: transitive
+include: _template_yaml
+task: blimp_transitive
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b665096a09297695eb40f791faeb81b7d9b7f56
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wh_island
+include: _template_yaml
+task: blimp_wh_island
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_object_gap.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_object_gap.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cb78e7b917573f4c8be60508f454a9ddd6e2b668
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_object_gap.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wh_questions_object_gap
+include: _template_yaml
+task: blimp_wh_questions_object_gap
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b956919c455893a0282a7d3842fc57eefe624114
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wh_questions_subject_gap
+include: _template_yaml
+task: blimp_wh_questions_subject_gap
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap_long_distance.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap_long_distance.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34c3e5cf7f141db947d42b945262de6849700d3c
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_questions_subject_gap_long_distance.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wh_questions_subject_gap_long_distance
+include: _template_yaml
+task: blimp_wh_questions_subject_gap_long_distance
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_no_gap.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_no_gap.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2221ce5fe0f55611003ab554d5f24aafad41bebf
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_no_gap.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wh_vs_that_no_gap
+include: _template_yaml
+task: blimp_wh_vs_that_no_gap
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca5af7a576a5ad6f15544cb748f857a549d90295
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wh_vs_that_with_gap
+include: _template_yaml
+task: blimp_wh_vs_that_with_gap
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap_long_distance.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap_long_distance.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d38acc5ff3dc2acd9e207d563377ea4933669f40
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_with_gap_long_distance.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wh_vs_that_with_gap_long_distance
+include: _template_yaml
+task: blimp_wh_vs_that_with_gap_long_distance