mrm8488 commited on
Commit
c32ee7d
1 Parent(s): 6867449

First commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Makefile +16 -0
  2. promptsource/__init__.py +0 -0
  3. promptsource/app.py +585 -0
  4. promptsource/seqio_tasks/__init__.py +3 -0
  5. promptsource/seqio_tasks/dataset_subset_template.csv +445 -0
  6. promptsource/seqio_tasks/experiment_D4.csv +242 -0
  7. promptsource/seqio_tasks/preview_annotated_prompts.py +111 -0
  8. promptsource/seqio_tasks/preview_promptsource.py +105 -0
  9. promptsource/seqio_tasks/tasks.py +421 -0
  10. promptsource/seqio_tasks/utils.py +77 -0
  11. promptsource/session.py +89 -0
  12. promptsource/templates.py +515 -0
  13. promptsource/templates/Zaid/coqa_expanded/templates.yaml +116 -0
  14. promptsource/templates/Zaid/quac_expanded/templates.yaml +79 -0
  15. promptsource/templates/acronym_identification/templates.yaml +219 -0
  16. promptsource/templates/ade_corpus_v2/Ade_corpus_v2_classification/templates.yaml +39 -0
  17. promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_ade_relation/templates.yaml +89 -0
  18. promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_dosage_relation/templates.yaml +82 -0
  19. promptsource/templates/adversarial_qa/adversarialQA/templates.yaml +110 -0
  20. promptsource/templates/adversarial_qa/dbert/templates.yaml +110 -0
  21. promptsource/templates/adversarial_qa/dbidaf/templates.yaml +110 -0
  22. promptsource/templates/adversarial_qa/droberta/templates.yaml +110 -0
  23. promptsource/templates/aeslc/templates.yaml +131 -0
  24. promptsource/templates/ag_news/templates.yaml +94 -0
  25. promptsource/templates/ai2_arc/ARC-Challenge/templates.yaml +130 -0
  26. promptsource/templates/ai2_arc/ARC-Easy/templates.yaml +130 -0
  27. promptsource/templates/amazon_polarity/templates.yaml +174 -0
  28. promptsource/templates/amazon_reviews_multi/en/templates.yaml +85 -0
  29. promptsource/templates/amazon_us_reviews/Wireless_v1_00/templates.yaml +69 -0
  30. promptsource/templates/ambig_qa/light/templates.yaml +94 -0
  31. promptsource/templates/anli/templates.yaml +191 -0
  32. promptsource/templates/app_reviews/templates.yaml +68 -0
  33. promptsource/templates/aqua_rat/raw/templates.yaml +125 -0
  34. promptsource/templates/art/templates.yaml +218 -0
  35. promptsource/templates/asnq/templates.yaml +118 -0
  36. promptsource/templates/asset/ratings/templates.yaml +56 -0
  37. promptsource/templates/asset/simplification/templates.yaml +41 -0
  38. promptsource/templates/banking77/templates.yaml +269 -0
  39. promptsource/templates/billsum/templates.yaml +104 -0
  40. promptsource/templates/bing_coronavirus_query_set/templates.yaml +72 -0
  41. promptsource/templates/blended_skill_talk/templates.yaml +46 -0
  42. promptsource/templates/boolq/templates.yaml +99 -0
  43. promptsource/templates/cbt/CN/templates.yaml +45 -0
  44. promptsource/templates/cbt/NE/templates.yaml +45 -0
  45. promptsource/templates/cbt/P/templates.yaml +45 -0
  46. promptsource/templates/cbt/V/templates.yaml +45 -0
  47. promptsource/templates/cbt/raw/templates.yaml +32 -0
  48. promptsource/templates/cc_news/templates.yaml +208 -0
  49. promptsource/templates/circa/templates.yaml +91 -0
  50. promptsource/templates/climate_fever/templates.yaml +238 -0
Makefile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: quality style
2
+
3
+ check_dirs := promptsource
4
+
5
+ # Check that source code meets quality standards
6
+
7
+ quality:
8
+ black --check --line-length 119 --target-version py38 $(check_dirs)
9
+ isort --check-only $(check_dirs)
10
+ flake8 $(check_dirs) --max-line-length 119
11
+
12
+ # Format source code automatically
13
+
14
+ style:
15
+ black --line-length 119 --target-version py38 $(check_dirs)
16
+ isort $(check_dirs)
promptsource/__init__.py ADDED
File without changes
promptsource/app.py ADDED
@@ -0,0 +1,585 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import textwrap
3
+ from multiprocessing import Manager, Pool
4
+
5
+ import pandas as pd
6
+ import plotly.express as px
7
+ import streamlit as st
8
+ from datasets import get_dataset_infos
9
+ from pygments import highlight
10
+ from pygments.formatters import HtmlFormatter
11
+ from pygments.lexers import DjangoLexer
12
+
13
+ from promptsource.session import _get_state
14
+ from promptsource.templates import Template, TemplateCollection
15
+ from promptsource.utils import (
16
+ get_dataset,
17
+ get_dataset_confs,
18
+ list_datasets,
19
+ removeHyphen,
20
+ renameDatasetColumn,
21
+ render_features,
22
+ )
23
+
24
+
25
+ # add an argument for read-only
26
+ # At the moment, streamlit does not handle python script arguments gracefully.
27
+ # Thus, for read-only mode, you have to type one of the below two:
28
+ # streamlit run promptsource/app.py -- -r
29
+ # streamlit run promptsource/app.py -- --read-only
30
+ # Check https://github.com/streamlit/streamlit/issues/337 for more information.
31
+ parser = argparse.ArgumentParser(description="run app.py with args")
32
+ parser.add_argument("-r", "--read-only", action="store_true", help="whether to run it as read-only mode")
33
+
34
+ args = parser.parse_args()
35
+ if args.read_only:
36
+ select_options = ["Helicopter view", "Prompted dataset viewer"]
37
+ side_bar_title_prefix = "Promptsource (Read only)"
38
+ else:
39
+ select_options = ["Helicopter view", "Prompted dataset viewer", "Sourcing"]
40
+ side_bar_title_prefix = "Promptsource"
41
+
42
+ #
43
+ # Helper functions for datasets library
44
+ #
45
+ get_dataset = st.cache(allow_output_mutation=True)(get_dataset)
46
+ get_dataset_confs = st.cache(get_dataset_confs)
47
+
48
+
49
+ def reset_template_state():
50
+ state.template_name = None
51
+ state.jinja = None
52
+ state.reference = None
53
+
54
+
55
+ #
56
+ # Loads session state
57
+ #
58
+ state = _get_state()
59
+
60
+ #
61
+ # Initial page setup
62
+ #
63
+ st.set_page_config(page_title="Promptsource", layout="wide")
64
+ st.sidebar.markdown(
65
+ "<center><a href='https://github.com/bigscience-workshop/promptsource'>💻Github - Promptsource\n\n</a></center>",
66
+ unsafe_allow_html=True,
67
+ )
68
+ mode = st.sidebar.selectbox(
69
+ label="Choose a mode",
70
+ options=select_options,
71
+ index=0,
72
+ key="mode_select",
73
+ )
74
+ st.sidebar.title(f"{side_bar_title_prefix} 🌸 - {mode}")
75
+
76
+ #
77
+ # Adds pygments styles to the page.
78
+ #
79
+ st.markdown(
80
+ "<style>" + HtmlFormatter(style="friendly").get_style_defs(".highlight") + "</style>", unsafe_allow_html=True
81
+ )
82
+
83
+ WIDTH = 80
84
+
85
+
86
+ def show_jinja(t, width=WIDTH):
87
+ wrap = textwrap.fill(t, width=width, replace_whitespace=False)
88
+ out = highlight(wrap, DjangoLexer(), HtmlFormatter())
89
+ st.write(out, unsafe_allow_html=True)
90
+
91
+
92
+ def show_text(t, width=WIDTH, with_markdown=False):
93
+ wrap = [textwrap.fill(subt, width=width, replace_whitespace=False) for subt in t.split("\n")]
94
+ wrap = "\n".join(wrap)
95
+ if with_markdown:
96
+ st.write(wrap, unsafe_allow_html=True)
97
+ else:
98
+ st.text(wrap)
99
+
100
+
101
+ #
102
+ # Loads template data
103
+ #
104
+ try:
105
+ template_collection = TemplateCollection()
106
+ except FileNotFoundError:
107
+ st.error(
108
+ "Unable to find the prompt folder!\n\n"
109
+ "We expect the folder to be in the working directory. "
110
+ "You might need to restart the app in the root directory of the repo."
111
+ )
112
+ st.stop()
113
+
114
+
115
+ if mode == "Helicopter view":
116
+ st.title("High level metrics")
117
+ st.write(
118
+ "If you want to contribute, please refer to the instructions in "
119
+ + "[Contributing](https://github.com/bigscience-workshop/promptsource/blob/main/CONTRIBUTING.md)."
120
+ )
121
+
122
+ #
123
+ # Global metrics
124
+ #
125
+ counts = template_collection.get_templates_count()
126
+ nb_prompted_datasets = len(counts)
127
+ st.write(f"## Number of *prompted datasets*: `{nb_prompted_datasets}`")
128
+ nb_prompts = sum(counts.values())
129
+ st.write(f"## Number of *prompts*: `{nb_prompts}`")
130
+
131
+ #
132
+ # Metrics per dataset/subset
133
+ #
134
+ # Download dataset infos (multiprocessing download)
135
+ manager = Manager()
136
+ all_infos = manager.dict()
137
+ all_datasets = list(set([t[0] for t in template_collection.keys]))
138
+
139
+ def get_infos(d_name):
140
+ all_infos[d_name] = get_dataset_infos(d_name)
141
+
142
+ pool = Pool(processes=len(all_datasets))
143
+ pool.map(get_infos, all_datasets)
144
+ pool.close()
145
+ pool.join()
146
+
147
+ results = []
148
+ for (dataset_name, subset_name) in template_collection.keys:
149
+ # Collect split sizes (train, validation and test)
150
+ if dataset_name not in all_infos:
151
+ infos = get_dataset_infos(dataset_name)
152
+ all_infos[dataset_name] = infos
153
+ else:
154
+ infos = all_infos[dataset_name]
155
+ if infos:
156
+ if subset_name is None:
157
+ subset_infos = infos[list(infos.keys())[0]]
158
+ else:
159
+ subset_infos = infos[subset_name]
160
+
161
+ split_sizes = {k: v.num_examples for k, v in subset_infos.splits.items()}
162
+ else:
163
+ # Zaid/coqa_expanded and Zaid/quac_expanded don't have dataset_infos.json
164
+ # so infos is an empty dic, and `infos[list(infos.keys())[0]]` raises an error
165
+ # For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0.
166
+ split_sizes = {}
167
+
168
+ # Collect template counts, original task counts and names
169
+ dataset_templates = template_collection.get_dataset(dataset_name, subset_name)
170
+ results.append(
171
+ {
172
+ "Dataset name": dataset_name,
173
+ "Subset name": "∅" if subset_name is None else subset_name,
174
+ "Train size": split_sizes["train"] if "train" in split_sizes else 0,
175
+ "Validation size": split_sizes["validation"] if "validation" in split_sizes else 0,
176
+ "Test size": split_sizes["test"] if "test" in split_sizes else 0,
177
+ "Number of prompts": len(dataset_templates),
178
+ "Number of original task prompts": sum(
179
+ [bool(t.metadata.original_task) for t in dataset_templates.templates.values()]
180
+ ),
181
+ "Prompt names": [t.name for t in dataset_templates.templates.values()],
182
+ }
183
+ )
184
+ results_df = pd.DataFrame(results)
185
+ results_df.sort_values(["Number of prompts"], inplace=True, ascending=False)
186
+ results_df.reset_index(drop=True, inplace=True)
187
+
188
+ nb_training_instances = results_df["Train size"].sum()
189
+ st.write(f"## Number of *training instances*: `{nb_training_instances}`")
190
+
191
+ plot_df = results_df[["Dataset name", "Subset name", "Train size", "Number of prompts"]].copy()
192
+ plot_df["Name"] = plot_df["Dataset name"] + " - " + plot_df["Subset name"]
193
+ plot_df.sort_values(["Train size"], inplace=True, ascending=False)
194
+ fig = px.bar(
195
+ plot_df,
196
+ x="Name",
197
+ y="Train size",
198
+ hover_data=["Dataset name", "Subset name", "Number of prompts"],
199
+ log_y=True,
200
+ title="Number of training instances per data(sub)set - y-axis is in logscale",
201
+ )
202
+ fig.update_xaxes(visible=False, showticklabels=False)
203
+ st.plotly_chart(fig, use_container_width=True)
204
+ st.write(
205
+ f"- Top 3 training subsets account for `{100*plot_df[:3]['Train size'].sum()/nb_training_instances:.2f}%` of the training instances."
206
+ )
207
+ biggest_training_subset = plot_df.iloc[0]
208
+ st.write(
209
+ f"- Biggest training subset is *{biggest_training_subset['Name']}* with `{biggest_training_subset['Train size']}` instances"
210
+ )
211
+ smallest_training_subset = plot_df[plot_df["Train size"] > 0].iloc[-1]
212
+ st.write(
213
+ f"- Smallest training subset is *{smallest_training_subset['Name']}* with `{smallest_training_subset['Train size']}` instances"
214
+ )
215
+
216
+ st.markdown("***")
217
+ st.write("Details per dataset")
218
+ st.table(results_df)
219
+
220
+ else:
221
+ # Combining mode `Prompted dataset viewer` and `Sourcing` since the
222
+ # backbone of the interfaces is the same
223
+ assert mode in ["Prompted dataset viewer", "Sourcing"], ValueError(
224
+ f"`mode` ({mode}) should be in `[Helicopter view, Prompted dataset viewer, Sourcing]`"
225
+ )
226
+
227
+ #
228
+ # Loads dataset information
229
+ #
230
+
231
+ dataset_list = list_datasets(
232
+ template_collection,
233
+ state,
234
+ )
235
+ ag_news_index = dataset_list.index("ag_news")
236
+
237
+ #
238
+ # Select a dataset - starts with ag_news
239
+ #
240
+ dataset_key = st.sidebar.selectbox(
241
+ "Dataset",
242
+ dataset_list,
243
+ key="dataset_select",
244
+ index=ag_news_index,
245
+ help="Select the dataset to work on.",
246
+ )
247
+
248
+ #
249
+ # If a particular dataset is selected, loads dataset and template information
250
+ #
251
+ if dataset_key is not None:
252
+
253
+ #
254
+ # Check for subconfigurations (i.e. subsets)
255
+ #
256
+ configs = get_dataset_confs(dataset_key)
257
+ conf_option = None
258
+ if len(configs) > 0:
259
+ conf_option = st.sidebar.selectbox("Subset", configs, index=0, format_func=lambda a: a.name)
260
+
261
+ dataset = get_dataset(dataset_key, str(conf_option.name) if conf_option else None)
262
+ splits = list(dataset.keys())
263
+ index = 0
264
+ if "train" in splits:
265
+ index = splits.index("train")
266
+ split = st.sidebar.selectbox("Split", splits, key="split_select", index=index)
267
+ dataset = dataset[split]
268
+ dataset = renameDatasetColumn(dataset)
269
+
270
+ dataset_templates = template_collection.get_dataset(dataset_key, conf_option.name if conf_option else None)
271
+
272
+ template_list = dataset_templates.all_template_names
273
+ num_templates = len(template_list)
274
+ st.sidebar.write(
275
+ "No of prompts created for "
276
+ + f"`{dataset_key + (('/' + conf_option.name) if conf_option else '')}`"
277
+ + f": **{str(num_templates)}**"
278
+ )
279
+
280
+ if mode == "Prompted dataset viewer":
281
+ if num_templates > 0:
282
+ template_name = st.sidebar.selectbox(
283
+ "Prompt name",
284
+ template_list,
285
+ key="template_select",
286
+ index=0,
287
+ help="Select the prompt to visualize.",
288
+ )
289
+
290
+ step = 50
291
+ example_index = st.sidebar.number_input(
292
+ f"Select the example index (Size = {len(dataset)})",
293
+ min_value=0,
294
+ max_value=len(dataset) - step,
295
+ value=0,
296
+ step=step,
297
+ key="example_index_number_input",
298
+ help="Offset = 50.",
299
+ )
300
+ else: # mode = Sourcing
301
+ st.sidebar.subheader("Select Example")
302
+ example_index = st.sidebar.slider("Select the example index", 0, len(dataset) - 1)
303
+
304
+ example = dataset[example_index]
305
+ example = removeHyphen(example)
306
+
307
+ st.sidebar.write(example)
308
+
309
+ st.sidebar.subheader("Dataset Schema")
310
+ rendered_features = render_features(dataset.features)
311
+ st.sidebar.write(rendered_features)
312
+
313
+ #
314
+ # Display dataset information
315
+ #
316
+ st.header("Dataset: " + dataset_key + " " + (("/ " + conf_option.name) if conf_option else ""))
317
+
318
+ st.markdown(
319
+ "*Homepage*: "
320
+ + dataset.info.homepage
321
+ + "\n\n*Dataset*: https://github.com/huggingface/datasets/blob/master/datasets/%s/%s.py"
322
+ % (dataset_key, dataset_key)
323
+ )
324
+
325
+ md = """
326
+ %s
327
+ """ % (
328
+ dataset.info.description.replace("\\", "") if dataset_key else ""
329
+ )
330
+ st.markdown(md)
331
+
332
+ #
333
+ # Body of the app: display prompted examples in mode `Prompted dataset viewer`
334
+ # or text boxes to create new prompts in mode `Sourcing`
335
+ #
336
+ if mode == "Prompted dataset viewer":
337
+ #
338
+ # Display template information
339
+ #
340
+ if num_templates > 0:
341
+ template = dataset_templates[template_name]
342
+ st.subheader("Prompt")
343
+ st.markdown("##### Name")
344
+ st.text(template.name)
345
+ st.markdown("##### Reference")
346
+ st.text(template.reference)
347
+ st.markdown("##### Original Task? ")
348
+ st.text(template.metadata.original_task)
349
+ st.markdown("##### Choices in template? ")
350
+ st.text(template.metadata.choices_in_prompt)
351
+ st.markdown("##### Metrics")
352
+ st.text(", ".join(template.metadata.metrics) if template.metadata.metrics else None)
353
+ st.markdown("##### Answer Choices")
354
+ if template.get_answer_choices_expr() is not None:
355
+ show_jinja(template.get_answer_choices_expr())
356
+ else:
357
+ st.text(None)
358
+ st.markdown("##### Jinja template")
359
+ splitted_template = template.jinja.split("|||")
360
+ st.markdown("###### Input template")
361
+ show_jinja(splitted_template[0].strip())
362
+ if len(splitted_template) > 1:
363
+ st.markdown("###### Target template")
364
+ show_jinja(splitted_template[1].strip())
365
+ st.markdown("***")
366
+
367
+ #
368
+ # Display a couple (steps) examples
369
+ #
370
+ for ex_idx in range(example_index, example_index + step):
371
+ if ex_idx >= len(dataset):
372
+ continue
373
+ example = dataset[ex_idx]
374
+ example = removeHyphen(example)
375
+ col1, _, col2 = st.beta_columns([12, 1, 12])
376
+ with col1:
377
+ st.write(example)
378
+ if num_templates > 0:
379
+ with col2:
380
+ prompt = template.apply(example, highlight_variables=False)
381
+ if prompt == [""]:
382
+ st.write("∅∅∅ *Blank result*")
383
+ else:
384
+ st.write("Input")
385
+ show_text(prompt[0])
386
+ if len(prompt) > 1:
387
+ st.write("Target")
388
+ show_text(prompt[1])
389
+ st.markdown("***")
390
+ else: # mode = Sourcing
391
+ st.markdown("## Prompt Creator")
392
+
393
+ #
394
+ # Create a new template or select an existing one
395
+ #
396
+ col1a, col1b, _, col2 = st.beta_columns([9, 9, 1, 6])
397
+
398
+ # current_templates_key and state.templates_key are keys for the templates object
399
+ current_templates_key = (dataset_key, conf_option.name if conf_option else None)
400
+
401
+ # Resets state if there has been a change in templates_key
402
+ if state.templates_key != current_templates_key:
403
+ state.templates_key = current_templates_key
404
+ reset_template_state()
405
+
406
+ with col1a, st.form("new_template_form"):
407
+ new_template_name = st.text_input(
408
+ "Create a New Prompt",
409
+ key="new_template",
410
+ value="",
411
+ help="Enter name and hit enter to create a new prompt.",
412
+ )
413
+ new_template_submitted = st.form_submit_button("Create")
414
+ if new_template_submitted:
415
+ if new_template_name in dataset_templates.all_template_names:
416
+ st.error(
417
+ f"A prompt with the name {new_template_name} already exists "
418
+ f"for dataset {state.templates_key}."
419
+ )
420
+ elif new_template_name == "":
421
+ st.error("Need to provide a prompt name.")
422
+ else:
423
+ template = Template(new_template_name, "", "")
424
+ dataset_templates.add_template(template)
425
+ reset_template_state()
426
+ state.template_name = new_template_name
427
+ else:
428
+ state.new_template_name = None
429
+
430
+ with col1b, st.beta_expander("or Select Prompt", expanded=True):
431
+ dataset_templates = template_collection.get_dataset(*state.templates_key)
432
+ template_list = dataset_templates.all_template_names
433
+ if state.template_name:
434
+ index = template_list.index(state.template_name)
435
+ else:
436
+ index = 0
437
+ state.template_name = st.selectbox(
438
+ "", template_list, key="template_select", index=index, help="Select the prompt to work on."
439
+ )
440
+
441
+ if st.button("Delete Prompt", key="delete_prompt"):
442
+ dataset_templates.remove_template(state.template_name)
443
+ reset_template_state()
444
+
445
+ variety_guideline = """
446
+ :heavy_exclamation_mark::question:Creating a diverse set of prompts whose differences go beyond surface wordings (i.e. marginally changing 2 or 3 words) is highly encouraged.
447
+ Ultimately, the hope is that exposing the model to such a diversity will have a non-trivial impact on the model's robustness to the prompt formulation.
448
+ \r**To get various prompts, you can try moving the cursor along theses axes**:
449
+ \n- **Interrogative vs affirmative form**: Ask a question about an attribute of the inputs or tell the model to decide something about the input.
450
+ \n- **Task description localization**: where is the task description blended with the inputs? In the beginning, in the middle, at the end?
451
+ \n- **Implicit situation or contextualization**: how explicit is the query? For instance, *Given this review, would you buy this product?* is an indirect way to ask whether the review is positive.
452
+ """
453
+
454
+ col1, _, _ = st.beta_columns([18, 1, 6])
455
+ with col1:
456
+ if state.template_name is not None:
457
+ show_text(variety_guideline, with_markdown=True)
458
+
459
+ #
460
+ # Edit the created or selected template
461
+ #
462
+ col1, _, col2 = st.beta_columns([18, 1, 6])
463
+ with col1:
464
+ if state.template_name is not None:
465
+ template = dataset_templates[state.template_name]
466
+ #
467
+ # If template is selected, displays template editor
468
+ #
469
+ with st.form("edit_template_form"):
470
+ updated_template_name = st.text_input("Name", value=template.name)
471
+ state.reference = st.text_input(
472
+ "Prompt Reference",
473
+ help="Short description of the prompt and/or paper reference for the prompt.",
474
+ value=template.reference,
475
+ )
476
+
477
+ # Metadata
478
+ state.metadata = template.metadata
479
+ state.metadata.original_task = st.checkbox(
480
+ "Original Task?",
481
+ value=template.metadata.original_task,
482
+ help="Prompt asks model to perform the original task designed for this dataset.",
483
+ )
484
+ state.metadata.choices_in_prompt = st.checkbox(
485
+ "Choices in Template?",
486
+ value=template.metadata.choices_in_prompt,
487
+ help="Prompt explicitly lists choices in the template for the output.",
488
+ )
489
+
490
+ # Metrics from here:
491
+ # https://github.com/google-research/text-to-text-transfer-transformer/blob/4b580f23968c2139be7fb1cd53b22c7a7f686cdf/t5/evaluation/metrics.py
492
+ metrics_choices = [
493
+ "BLEU",
494
+ "ROUGE",
495
+ "Squad",
496
+ "Trivia QA",
497
+ "Accuracy",
498
+ "Pearson Correlation",
499
+ "Spearman Correlation",
500
+ "MultiRC",
501
+ "AUC",
502
+ "COQA F1",
503
+ "Edit Distance",
504
+ ]
505
+ # Add mean reciprocal rank
506
+ metrics_choices.append("Mean Reciprocal Rank")
507
+ # Add generic other
508
+ metrics_choices.append("Other")
509
+ # Sort alphabetically
510
+ metrics_choices = sorted(metrics_choices)
511
+ state.metadata.metrics = st.multiselect(
512
+ "Metrics",
513
+ metrics_choices,
514
+ default=template.metadata.metrics,
515
+ help="Select all metrics that are commonly used (or should "
516
+ "be used if a new task) to evaluate this prompt.",
517
+ )
518
+
519
+ # Answer choices
520
+ if template.get_answer_choices_expr() is not None:
521
+ answer_choices = template.get_answer_choices_expr()
522
+ else:
523
+ answer_choices = ""
524
+ state.answer_choices = st.text_input(
525
+ "Answer Choices",
526
+ value=answer_choices,
527
+ help="A Jinja expression for computing answer choices. "
528
+ "Separate choices with a triple bar (|||).",
529
+ )
530
+
531
+ # Jinja
532
+ state.jinja = st.text_area("Template", height=40, value=template.jinja)
533
+
534
+ # Submit form
535
+ if st.form_submit_button("Save"):
536
+ if (
537
+ updated_template_name in dataset_templates.all_template_names
538
+ and updated_template_name != state.template_name
539
+ ):
540
+ st.error(
541
+ f"A prompt with the name {updated_template_name} already exists "
542
+ f"for dataset {state.templates_key}."
543
+ )
544
+ elif updated_template_name == "":
545
+ st.error("Need to provide a prompt name.")
546
+ else:
547
+ # Parses state.answer_choices
548
+ if state.answer_choices == "":
549
+ updated_answer_choices = None
550
+ else:
551
+ updated_answer_choices = state.answer_choices
552
+
553
+ dataset_templates.update_template(
554
+ state.template_name,
555
+ updated_template_name,
556
+ state.jinja,
557
+ state.reference,
558
+ state.metadata,
559
+ updated_answer_choices,
560
+ )
561
+ # Update the state as well
562
+ state.template_name = updated_template_name
563
+ #
564
+ # Displays template output on current example if a template is selected
565
+ # (in second column)
566
+ #
567
+ with col2:
568
+ if state.template_name is not None:
569
+ st.empty()
570
+ template = dataset_templates[state.template_name]
571
+ prompt = template.apply(example)
572
+ if prompt == [""]:
573
+ st.write("∅∅∅ *Blank result*")
574
+ else:
575
+ st.write("Input")
576
+ show_text(prompt[0], width=40)
577
+ if len(prompt) > 1:
578
+ st.write("Target")
579
+ show_text(prompt[1], width=40)
580
+
581
+
582
+ #
583
+ # Must sync state at end
584
+ #
585
+ state.sync()
promptsource/seqio_tasks/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ """Tools for loading prompted tasks in seqio."""
2
+
3
+ from . import tasks, utils
promptsource/seqio_tasks/dataset_subset_template.csv ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ comment,do_eval,skip_train,dataset_subset_template,nontrivial_choices_given,nontrivial_choices_hidden,trivial_choices_given,trivial_choices_hidden,generative_non_true_task,generative_non_true_implausible,generative_true_task,negated_answers,counting,non_true_task_other,awkward_phrasing,ungrammatical,template_bug,long_distance,no_sep_2_sentences,verbose,answer_span_indices,non_natural_language
2
+ ,,,adversarial_qa_dbert_adversarial_qa_dbert_1,,,,,,,,,,,,,,,,,,
3
+ ,,,adversarial_qa_dbert_adversarial_qa_dbert_10,,,,,,,,,,,,,,,,,True,True
4
+ ,,,adversarial_qa_dbert_adversarial_qa_dbert_2,,,,,,,,,,,,,,True,,,,
5
+ ,,,adversarial_qa_dbert_adversarial_qa_dbert_3,,,,,,,,,,,,,,,,,,
6
+ ,,,adversarial_qa_dbert_adversarial_qa_dbert_4,,,,,True,,,,,,,,,,,,,
7
+ ,,,adversarial_qa_dbert_adversarial_qa_dbert_5,,,,,True,,,,,,,,,,,,,
8
+ ,,,adversarial_qa_dbert_adversarial_qa_dbert_6,,,,,,,,,,,,,,,,True,,
9
+ ,,,adversarial_qa_dbert_adversarial_qa_dbert_7,,,,,,,,,,,,,,,,,True,
10
+ ,,,adversarial_qa_dbert_adversarial_qa_dbert_8,,,,,,,,,,,,,,,,,True,
11
+ ,,,adversarial_qa_dbert_adversarial_qa_dbert_9,,,,,,,,,,,,,,,,,True,
12
+ ,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_1,,,,,,,,,,,,,,,,,,
13
+ ,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_10,,,,,,,,,,,,,,,,,True,True
14
+ ,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_2,,,,,,,,,,,,,,True,,,,
15
+ ,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_3,,,,,,,,,,,,,,,,,,
16
+ ,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_4,,,,,True,,,,,,,,,,,,,
17
+ ,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_5,,,,,True,,,,,,,,,,,,,
18
+ ,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_6,,,,,,,,,,,,,,,,True,,
19
+ ,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_7,,,,,,,,,,,,,,,,,True,
20
+ ,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_8,,,,,,,,,,,,,,,,,True,
21
+ ,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_9,,,,,,,,,,,,,,,,,True,
22
+ ,,,adversarial_qa_droberta_adversarial_qa_droberta_1,,,,,,,,,,,,,,,,,,
23
+ ,,,adversarial_qa_droberta_adversarial_qa_droberta_10,,,,,,,,,,,,,,,,,True,True
24
+ ,,,adversarial_qa_droberta_adversarial_qa_droberta_2,,,,,,,,,,,,,,True,,,,
25
+ ,,,adversarial_qa_droberta_adversarial_qa_droberta_3,,,,,,,,,,,,,,,,,,
26
+ ,,,adversarial_qa_droberta_adversarial_qa_droberta_4,,,,,True,,,,,,,,,,,,,
27
+ ,,,adversarial_qa_droberta_adversarial_qa_droberta_5,,,,,True,,,,,,,,,,,,,
28
+ ,,,adversarial_qa_droberta_adversarial_qa_droberta_6,,,,,,,,,,,,,,,,True,,
29
+ ,,,adversarial_qa_droberta_adversarial_qa_droberta_7,,,,,,,,,,,,,,,,,True,
30
+ ,,,adversarial_qa_droberta_adversarial_qa_droberta_8,,,,,,,,,,,,,,,,,True,
31
+ ,,,adversarial_qa_droberta_adversarial_qa_droberta_9,,,,,,,,,,,,,,,,,True,
32
+ ,,,ag_news_classify,,True,,,,,,,,,,,,,,,,
33
+ ,,,ag_news_classify_with_choices,True,,,,,,,,,,,,,,,,,
34
+ ,,,ag_news_recommend,True,,,,,,,,,,,,,,,,,
35
+ ,,,ag_news_which_section,,True,,,,,,,,,,,,,,,,
36
+ ,,,ag_news_which_section_choices,True,,,,,,,,,,,,,,,,,
37
+ ,,,amazon_polarity_Template_1,,,True,,,,,,,,,,,,,,,
38
+ ,,,amazon_polarity_Template_2,,,,True,,,,,,,,,,True,,,,
39
+ ,,,amazon_polarity_Template_3,,,,True,,,,,,,,,,,,,,
40
+ ,,,amazon_polarity_Template_4,,,,True,,,,,,,,,,True,,,,
41
+ ,,,amazon_polarity_Template_5,,,True,,,,,,,,,,,,,,,
42
+ ,,,amazon_polarity_Template_6,,,True,,,,,,,,,,,True,,,,
43
+ ,True,True,anli_GPT_3_style_r1,True,,,,,,,,,,,,,,,,,
44
+ ,True,True,anli_based_on_the_previous_passage_r1,True,,,,,,,,,,,,,,,,,
45
+ ,True,True,anli_does_S1_contradict_S2__r1,,,,,,,,True,,True,,,,,,,,
46
+ ,True,True,anli_does_S1_entail_S2__r1,True,,,,,,,,,,,,,,,,,
47
+ ,True,True,anli_given_does_it_follow_that__r1,True,,,,,,,,,,,,,,,,,
48
+ ,True,True,anli_given_it_must_be_true_that__r1,True,,,,,,,,,,,,,,,,,
49
+ ,True,True,anli_GPT_3_style_r2,True,,,,,,,,,,,,,,,,,
50
+ ,True,True,anli_based_on_the_previous_passage_r2,True,,,,,,,,,,,,,,,,,
51
+ ,True,True,anli_does_S1_contradict_S2__r2,,,,,,,,True,,True,,,,,,,,
52
+ ,True,True,anli_does_S1_entail_S2__r2,True,,,,,,,,,,,,,,,,,
53
+ ,True,True,anli_given_does_it_follow_that__r2,True,,,,,,,,,,,,,,,,,
54
+ ,True,True,anli_given_it_must_be_true_that__r2,True,,,,,,,,,,,,,,,,,
55
+ ,True,True,anli_GPT_3_style_r3,True,,,,,,,,,,,,,,,,,
56
+ ,True,True,anli_based_on_the_previous_passage_r3,True,,,,,,,,,,,,,,,,,
57
+ ,True,True,anli_does_S1_contradict_S2__r3,,,,,,,,True,,True,,,,,,,,
58
+ ,True,True,anli_does_S1_entail_S2__r3,True,,,,,,,,,,,,,,,,,
59
+ ,True,True,anli_given_does_it_follow_that__r3,True,,,,,,,,,,,,,,,,,
60
+ ,True,True,anli_given_it_must_be_true_that__r3,True,,,,,,,,,,,,,,,,,
61
+ ,,,app_reviews_categorize_rating_using_review,,True,,,,,,,,,,,,,,,,
62
+ ,,,app_reviews_convert_to_rating,True,,,,,,,,,,,,,,,,,
63
+ ,,,app_reviews_convert_to_star_rating,,,,,,,,,,True,,,,,,,,
64
+ ,,,app_reviews_generate_review,,,,,True,True,,,,,,,,,,,,
65
+ ,,,ai2_arc_ARC_Challenge_answer_qn,,,,,True,True,,,,,,,,,,,,
66
+ ,,,ai2_arc_ARC_Challenge_false,,,,,,,,True,,,,,,,,,,
67
+ ,,,ai2_arc_ARC_Challenge_qa_options,True,,,,,,,,,,,,,,,,,
68
+ ,,,ai2_arc_ARC_Challenge_test,True,,,,,,,,,,,,,,,,,
69
+ ,,,ai2_arc_ARC_Easy_answer_qn,,,,,True,True,,,,,,,,,,,,
70
+ ,,,ai2_arc_ARC_Easy_false,,,,,,,,True,,,,,,,,,,
71
+ ,,,ai2_arc_ARC_Easy_qa_options,True,,,,,,,,,,,,,,,,,
72
+ ,,,ai2_arc_ARC_Easy_test,True,,,,,,,,,,,,,,,,,
73
+ ,True,,circa_goldstandard1_judgement,True,,,,,,,,,,True,,,,,,,
74
+ ,True,,circa_goldstandard2_judgement,True,,,,,,,,,,True,,,,,,,
75
+ ,,,circa_judgement,,True,,,,,,,,True,True,,,,,,,
76
+ ,,,circa_possible_qn,,,,,True,,,,,,,,,,,,,
77
+ ,,,circa_question_declarative,,,,,,,,,,True,,,,,,,,
78
+ ,,,cnn_dailymail_3.0.0_generate_story,,,,,True,,,,,,,,,,,,,
79
+ ,,,cnn_dailymail_3.0.0_news_card_view,,,,,,,True,,,,,,,True,,,,
80
+ ,,,cnn_dailymail_3.0.0_news_stock,,,,,,,True,,,,,,,True,,,,
81
+ ,,,cnn_dailymail_3.0.0_news_summary,,,,,,,True,,,,,,,True,,True,,
82
+ ,,,cnn_dailymail_3.0.0_spice_up_story,,,,,True,,,,,,,,,,,,,
83
+ ,,,codah_codah_answer_no_option,,True,,,,,,,,,,,,,,,,
84
+ ,,,codah_codah_answer_with_option,True,,,,,,,,,,,,,,,,,
85
+ ,,,codah_codah_answer_with_option_idx,True,,,,,,,,,,,,,,,,,
86
+ ,,,codah_codah_answer_with_option_post,True,,,,,,,,,,,,,,,,,
87
+ ,,,codah_codah_choose_from_list,True,,,,,,,,,,,,,,,,,
88
+ ,,,codah_codah_finish_from_the_list,True,,,,,,,,,,,,,,,,,
89
+ ,,,codah_codah_finish_from_the_list_post,True,,,,,,,,,,,,,,,,,
90
+ ,,,codah_codah_finish_pre,,True,,,,,,,,,,,,,,,,
91
+ ,,,codah_codah_question_category,,,,,,,,,,True,,,,,,,,
92
+ ,,,codah_codah_question_category_bis,,,,,,,,,,True,,,,,,,,
93
+ ,,,common_gen_Example_prompt,,,,,,,True,,,,,,,,,,,
94
+ ,,,common_gen_Given_concepts,,,,,,,True,,,,,,,,,,,
95
+ ,,,common_gen_Put_together,,,,,,,True,,,,,,,,,,,
96
+ ,,,common_gen_choice_in_concept_centric_sentence_generation,,,,,,,True,,,,,,,,,,,
97
+ ,,,common_gen_sentence_to_concepts,,,,,,,,,,True,,,,,,,,
98
+ ,,,cos_e_v1.11_description_question_option_id,True,,,,,,,,,,,,,,,,,
99
+ ,,,cos_e_v1.11_description_question_option_text,True,,,,,,,,,,,,,,,,,
100
+ ,,,cos_e_v1.11_generate_explanation_given_text,True,,,,,,True,,,,,,True,,,,,
101
+ ,,,cos_e_v1.11_generate_explanation_no_given_answer,,True,,,,,True,,,,,,,,,,,
102
+ ,,,cos_e_v1.11_question_description_option_id,True,,,,,,,,,,,,,,,,,
103
+ ,,,cos_e_v1.11_question_description_option_text,True,,,,,,,,,,,,,,,,,
104
+ ,,,cos_e_v1.11_question_option_description_id,True,,,,,,,,,,,,,,,,,
105
+ ,,,cos_e_v1.11_question_option_description_text,True,,,,,,,,,,,,,,,,,
106
+ revisit,,,cosmos_qa_context_description_question_answer_id,True,,,,,,,,,,,,,,,,,
107
+ ,,,cosmos_qa_context_description_question_answer_text,True,,,,,,,,,,,,,,,,,
108
+ ,,,cosmos_qa_context_description_question_text,,True,,,,,,,,,,,,,,,,
109
+ ,,,cosmos_qa_context_question_answer_description_id,True,,,,,,,,,,,,,,,,,
110
+ ,,,cosmos_qa_context_question_answer_description_text,True,,,,,,,,,,,,,,,,,
111
+ ,,,cosmos_qa_context_question_description_answer_id,True,,,,,,,,,,,,,,,,,
112
+ ,,,cosmos_qa_context_question_description_answer_text,True,,,,,,,,,,,,,,,,,
113
+ ,,,cosmos_qa_context_question_description_text,,True,,,,,,,,,,,,,,,,
114
+ ,,,cosmos_qa_description_context_question_answer_id,True,,,,,,,,,,,,,,,,,
115
+ ,,,cosmos_qa_description_context_question_answer_text,True,,,,,,,,,,,,,,,,,
116
+ ,,,cosmos_qa_description_context_question_text,,True,,,,,,,,,,,,,,,,
117
+ ,,,cosmos_qa_no_prompt_id,True,,,,,,,,,,,,,,,,,
118
+ ,,,cosmos_qa_no_prompt_text,True,,,,,,,,,,,,,,,,,
119
+ ,,,dbpedia_14_dbpedia_1,,True,,,,,,,,,,,,,,,,
120
+ ,,,dbpedia_14_dbpedia_10,True,,,,,,,,,,,,,,,,,
121
+ ,,,dbpedia_14_dbpedia_3,,True,,,,,,,,,,,,,,,,
122
+ ,,,dbpedia_14_dbpedia_5,,True,,,,,,,,,,,,,,,,
123
+ ,,,dbpedia_14_dbpedia_7,,True,,,,,,,,,,,,,,,,
124
+ ,,,dbpedia_14_dbpedia_8,,True,,,,,,,,,,,,,,,,
125
+ ,,,dbpedia_14_dbpedia_9,True,,,,,,,,,,,,,,,,,
126
+ ,,,dream_answer_to_dialogue,,,,,True,,,,,,,,,,,,,
127
+ ,,,dream_baseline,True,,,,,,,,,,,,,,,,,
128
+ ,,,dream_conversation,True,,,,,,,,,,,,,,,,,
129
+ ,,,dream_generate_first_utterance,,,,,True,,,,,,,,,,,,,
130
+ ,,,dream_generate_last_utterance,,,,,True,,,,,,,,,,,,,
131
+ ,True,,emo_feeling,True,,,,,,,,,,,,,,,,,
132
+ ,True,,emo_final_message,True,,,,,,,,,,,,,,,,,
133
+ ,True,,emo_persons_describe,True,,,,,,,,,,,,,,,True,,
134
+ ,True,,emo_persons_infer,True,,,,,,,,,,,,,,,,,
135
+ ,True,,emo_spoke_last,True,,,,,,,,,,,,,,,,,
136
+ ,,,freebase_qa_inference_chain_prompt,,,,,,,,,,True,,,,,,,,
137
+ ,,,freebase_qa_inference_chain_prompt_context,,,,,,,,,,True,,,,,,,,
138
+ ,,,freebase_qa_qa_context_1,,,,,,,,,,,,,,,,,,
139
+ ,,,freebase_qa_qa_context_2,,,,,,,,,,,,,,,,,,
140
+ ,,,freebase_qa_qa_template_basic,,,,,,,,,,,,,,,,,,
141
+ ,,,gigaword_Document_,,,,,,,True,,,,,,,,,,,
142
+ ,,,gigaword_Summarize_this_document_,,,,,,,True,,,,,,,,,,,
143
+ ,,,gigaword_TLDR,,,,,,,True,,,,,,,,,,,
144
+ ,,,gigaword_generate_summary_for_this,,,,,,,True,,,,,,,,,,,
145
+ ,,,gigaword_in_a_nutshell,,,,,,,True,,,,,,,,,,,
146
+ ,,,gigaword_reverse_writing,,,,,,,,,,True,,,,,,,,
147
+ ,,,gigaword_reverse_writing_2,,,,,,,True,,,,,,,,,,,
148
+ ,,,gigaword_summarize_,,,,,,,True,,,,,,,,,,,
149
+ ,,,gigaword_write_one_sentence,,,,,,,True,,,,,,,,,,,
150
+ ,True,True,glue_cola_Following_sentence_acceptable,True,,,,,,,,,,,,,,,,,
151
+ ,True,True,glue_cola_Make_sense_yes_no,,,True,,,,,,,,,,,,,,,
152
+ ,True,True,glue_cola_Previous_sentence_acceptable,,,,True,,,,,,,,,,,,,,
153
+ ,True,True,glue_cola_editing,,,True,,,,,,,,,,,,,,,
154
+ ,True,True,glue_cola_jinja_example,,,,True,,,,,,,,,,,,,,
155
+ ,True,,glue_mrpc_equivalent,True,,,,,,,,,,,,,,True,,,
156
+ ,True,,glue_mrpc_paraphrase,,,,True,,,,,,,,,,,,,,
157
+ ,True,,glue_mrpc_replace,,,,True,,,,,,,,,,,,,,
158
+ ,True,,glue_mrpc_same_thing,,,,True,,,,,,,,,,,True,,,
159
+ ,True,,glue_mrpc_want_to_know,,,,True,,,,,,,,,,,True,,,
160
+ ,,,glue_qqp_answer,,,,True,,,,,,,,,,,,,,
161
+ ,,,glue_qqp_duplicate,,,,True,,,,,,,,,,,,,,
162
+ ,,,glue_qqp_duplicate_or_not,True,,,,,,,,,,,,,,,,,
163
+ ,,,glue_qqp_quora,,,,True,,,,,,,,,,,,True,,
164
+ ,,,glue_qqp_same_thing,,,,True,,,,,,,,,,,,,,
165
+ ,,,glue_sst2_following_positive_negative,True,,,,,,,,,,,,,,,,,
166
+ ,,,glue_sst2_happy_or_mad,True,,,,,,,,,,,,,,,,,
167
+ ,,,glue_sst2_positive_negative_after,True,,,,,,,,,,,,,,,,,
168
+ ,,,glue_sst2_review,True,,,,,,,,,,,,,,,,,
169
+ ,,,glue_sst2_said,True,,,,,,,,,,,,,,,,,
170
+ ,,True,glue_stsb_examples,,,,,,,,,,,,,,,,,,
171
+ ,,True,glue_stsb_rank,,,,,,,,,,,,,,,,,,
172
+ ,,True,glue_stsb_rate,,,,,,,,,,,,,,,,,,
173
+ ,,True,glue_stsb_score,,,,,,,,,,,,,,,,,,
174
+ ,,True,glue_stsb_similarity,,,,,,,,,,,,,,,,,,
175
+ ,True,True,hans_GPT_3_style,True,,,,,,,,,,,,,,,,,
176
+ ,True,True,hans_Suppose_Can_we_infer_that_,,,,True,,,,,,,,,,,,,,
177
+ ,True,True,hans_based_on_the_previous_passage,,,,True,,,,,,,,,,,,,,
178
+ ,True,True,hans_does_S1_entail_S2_,,,True,,,,,,,,,,,,,,,
179
+ ,True,True,hans_given_does_it_follow_that_,,,True,,,,,,,,,,,,,,,
180
+ ,True,True,hans__does_the_previous_passage_support_the_claim_that,,,,True,,,,,,,,,,,,,,
181
+ ,,,hellaswag_YesNo_0,,,True,,,,,,,,,,,,,,,
182
+ ,,,hellaswag_YesNo_1,,,True,,,,,,,,,,,,,,,
183
+ ,,,hellaswag_YesNo_2,,,True,,,,,,,,,,,,,,,
184
+ ,,,hellaswag_YesNo_3,,,True,,,,,,,,,,,,,,,
185
+ ,,,hellaswag_YesNo_reversed_0,,,True,,,,,,,,,,,,,,,
186
+ ,,,hellaswag_YesNo_reversed_1,,,True,,,,,,,,,,,,,,,
187
+ ,,,hellaswag_YesNo_reversed_2,,,True,,,,,,,,,,,,,,,
188
+ ,,,hellaswag_YesNo_reversed_3,,,True,,,,,,,,,,,,,,,
189
+ ,,,hellaswag_complete_first_then,True,,,,,,,,,,,,,,,,,
190
+ ,,,hellaswag_first_then,True,,,,,,,,,,,,,,,,,
191
+ ,,,hellaswag_how_ends,True,,,,,,,,,,,,,,,,,
192
+ ,,,hellaswag_if_begins_how_continues,True,,,,,,,,,,,,,,,,,
193
+ ,,,hellaswag_which_ending,True,,,,,,,,,,,,,,,,,
194
+ ,,,imdb_imdb_1,,True,,,,,,,,,,,,,,,,
195
+ ,,,imdb_imdb_2,,True,,,,,,True,,,,,,,,,,
196
+ ,,,imdb_imdb_3,,True,,,,,,,,,,,,,,,,
197
+ ,,,imdb_imdb_4,,True,,,,,,,,,,,,,,,,
198
+ ,,,imdb_imdb_5,,True,,,,,,,,,,,,True,,,,
199
+ ,,,imdb_imdb_6,,True,,,,,,,,,,,,,,,,
200
+ ,,,imdb_imdb_7,,True,,,,,,,,,,,,,,,,
201
+ ,,,imdb_imdb_8,,True,,,,,,,,,,,,,,,,
202
+ ,,,imdb_imdb_9,,,,True,,,,,,,,,,,,,,
203
+ ,True,,mc_taco_mc_taco_1,,,,True,,,,,,,,,,,,,,
204
+ ,,,mc_taco_mc_taco_2,,,,,,,,,,True,,,,,,,,
205
+ ,True,,mc_taco_mc_taco_3,,,True,,,,,,,,,,,True,,,,
206
+ ,,,mc_taco_mc_taco_4,True,,,,,,,,,True,,,,,,,,
207
+ ,,,mc_taco_mc_taco_5,,,,,True,,,,,,,,,,,,,
208
+ ,,,mc_taco_mc_taco_6,,True,,,,,,,,,,,,,,,,
209
+ ,True,True,nq_open_context_self_description,,,,,,,,,,,,,,,,,,
210
+ ,,True,nq_open_guess_question,,,,,True,,,,,,,,,,,,,
211
+ ,True,True,nq_open_question_answer,,,,,,,,,,,,,,,,,,
212
+ ,True,True,nq_open_question_with_instruction,,,,,,,,,,,,,,,,,,
213
+ ,,,onestop_english_ara_context,True,,,,,,,,,,,,,,,,,
214
+ ,,,onestop_english_assess,True,,,,,,,,,,,,,True,,,,
215
+ ,,,onestop_english_ats,True,,,,,,,,,,,,,,,,,
216
+ ,,,onestop_english_esl_context,True,,,,,,,,,,,,,True,,,,
217
+ ,,,onestop_english_esl_variation,True,,,,,,,,,,,,,True,,,,
218
+ ,True,,openbookqa_main_choices,True,,,,,,,,,,,,,,,,,
219
+ ,True,,openbookqa_main_choose_an_answer_with_options,True,,,,,,,,,,,,,,,,,
220
+ ,True,,openbookqa_main_only_options,True,,,,,,,,,,,,,,,,,
221
+ ,True,,openbookqa_main_pick_answer_with_options,True,,,,,,,,,,,,,,,,,
222
+ ,True,,openbookqa_main_pick_using_id,True,,,,,,,,,,,,,,,,,
223
+ ,True,,openbookqa_main_which_correct,True,,,,,,,,,,,,,,,,,
224
+ ,,True,openbookqa_main_which_correct_inverse,True,,,,,,,,,,,,True,,,,,
225
+ ,,,paws_labeled_final_Concatenation,,,True,,,,,,,,,,True,,,,,
226
+ ,,,paws_labeled_final_Concatenation_no_label,,,,True,,,,,,,,,True,,,,,
227
+ ,,,paws_labeled_final_Meaning,,,True,,,,,,,,,,True,,,,,
228
+ ,,,paws_labeled_final_Meaning_no_label,,,,True,,,,,,,,,True,,,,,
229
+ ,,,paws_labeled_final_PAWS_ANLI_GPT3,True,,,,,,,,,True,,,,,,,,
230
+ ,,,paws_labeled_final_PAWS_ANLI_GPT3_no_label,,True,,,,,,,,True,,,,,,,,
231
+ ,,,piqa_Correct_the_solution,,,,,True,,,,,,,,,,,,,
232
+ ,,,piqa_Correct_the_solution_if_false_from_sol_1,,,,,True,,,,,,,,,,,,,
233
+ ,,,piqa_Correct_the_solution_if_false_from_sol_2,,,,,True,,,,,,,,,,,,,
234
+ should use jinja choice,,,piqa_Does_this_solution_make_sense_sol1,,,,True,,,,,,,,,,,,,,
235
+ ,,,piqa_Does_this_solution_make_sense_sol2,,,,True,,,,,,,,,,,,,,
236
+ ,,,piqa_Generate_a_similar_but_wrong_solution,,,,,True,,,,,,,,,,,,,
237
+ ,,,piqa_choose_the_most_appropriate_solution,True,,,,,,,,,,,,,,,,,
238
+ duplicate of above,,True,piqa_choose_the_most_appropriate_solution_reorder_solution,True,,,,,,,,,,,,,,,,,
239
+ ,,,piqa_no_prompt_needed,,,,,True,,,,,,,,,,,,,
240
+ ,,,qa_srl_aq,,,,,True,True,,,,,,,,,,,,
241
+ ,,,qa_srl_context_answer,,,,,True,,,,,,,,,,,,,
242
+ ,,,qa_srl_context_qn,,,,,True,,,,,,,,,,,,,
243
+ ,,,qa_srl_predicate,,,,,,,,,,True,,,,,,,,
244
+ need non-naive metric,True,,qa_srl_qa,,,,,,,,,,,,,,,,,,
245
+ ,,,qasc_is_correct_0,,,,True,,,,,,,,,,,,,,
246
+ ,,,qasc_is_correct_1,,,,True,,,,,,,,,,,,,,
247
+ ,,,qasc_qu_combined,True,,,,,,,,,,,,,,,,,
248
+ ,,,qasc_sep_combined_can_tell,True,,,,,,,,,,,,,,,,,
249
+ ,,,qasc_sep_qu,True,,,,,,,,,,,,,,,,,
250
+ ,,,quail_context_description_question_answer_id,True,,,,,,,,,,,,,,,,,
251
+ ,,,quail_context_description_question_answer_text,True,,,,,,,,,,,,,,,,,
252
+ ,,,quail_context_description_question_text,,True,,,,,,,,,,,,,,,,
253
+ ,,,quail_context_question_answer_description_id,True,,,,,,,,,,,,,,,,,
254
+ ,,,quail_context_question_answer_description_text,True,,,,,,,,,,,,,,,,,
255
+ ,,,quail_context_question_description_answer_id,True,,,,,,,,,,,,,,,,,
256
+ ,,,quail_context_question_description_answer_text,True,,,,,,,,,,,,,,,,,
257
+ ,,,quail_context_question_description_text,True,,,,,,,,,,,,,,,,,
258
+ ,,,quail_description_context_question_answer_id,,True,,,,,,,,,,,,,,,,
259
+ ,,,quail_description_context_question_answer_text,True,,,,,,,,,,,,,,,,,
260
+ ,,,quail_description_context_question_text,,True,,,,,,,,,,,,,,,,
261
+ ,,,quail_no_prompt_id,True,,,,,,,,,,,,,,,,,
262
+ ,,,quail_no_prompt_text,True,,,,,,,,,,,,,,,,,
263
+ ,,,quartz_para_question_1,True,,,,,,,,,,,,,,,,,
264
+ near duplicate of the above,,True,quartz_para_question_1_reverse,True,,,,,,,,,,,,,,,,,
265
+ ,,,quartz_para_question_2,True,,,,,,,,,,,,,,,,,
266
+ ,,,quartz_para_question_3_choices,True,,,,,,,,,,,,,,,,,
267
+ ,,,quartz_para_question_4_choices,True,,,,,,,,,,,,,,,,,
268
+ ,,,quartz_para_question_plain,True,,,,,,,,,,,,,,,,,
269
+ near duplicate of the above,,True,quartz_para_question_plain_reverse,True,,,,,,,,,,,,,,,,,
270
+ ,,,quartz_question_para_1,True,,,,,,,,,,,,,,,,,
271
+ near duplicate of the above,,True,quartz_question_para_1_reverse,True,,,,,,,,,,,,,,,,,
272
+ ,,,quartz_question_para_2,True,,,,,,,,,,,,,,,,,
273
+ ,,,quartz_question_para_3,True,,,,,,,,,,,,,,,,,
274
+ near duplicate of the above,,True,quartz_question_para_3_reverse,True,,,,,,,,,,,,,,,,,
275
+ ,,,quoref_Template_1,,,,,,,,,,,,,,,,,,
276
+ ,,,quoref_Template_2,,,,,,,,,,,,,,True,,,,
277
+ ,,,quoref_Template_3,,,,,True,,,,,,True,,,,,,,
278
+ ,,,quoref_Template_4,,,,,,,,,,True,,,,,,,True,
279
+ ,,,quoref_Template_5,,,,,,,,,,True,,,,,,,,
280
+ ,,,race_high_Read_the_article_and_answer_the_question_no_option_,,True,,,,,,,,,,,,,,,,
281
+ ,True,,race_high_Read_the_article_and_select_the_best_answer,True,,,,,,,,,,,,,,,,,
282
+ near duplicate of the above,,True,race_high_Read_the_article_and_select_the_best_answer2,True,,,,,,,,,,,,,,,,,
283
+ near duplicate of the above,,True,race_high_Read_the_article_and_select_the_best_answer3,True,,,,,,,,,,,,,,,,,
284
+ ,,,race_high_Write_a_multi_choice_question_for_the_following_article,,,,,True,,,,,,,,,,,,,
285
+ ,,,race_high_Write_a_multi_choice_question_for_the_following_article_2,,,,,True,,,,,,,,,,,,,
286
+ ,,,race_middle_Read_the_article_and_answer_the_question_no_option_,,True,,,,,,,,,,,,,,,,
287
+ ,True,,race_middle_Read_the_article_and_select_the_best_answer,True,,,,,,,,,,,,,,,,,
288
+ near duplicate of the above,,True,race_middle_Read_the_article_and_select_the_best_answer2,True,,,,,,,,,,,,,,,,,
289
+ near duplicate of the above,,True,race_middle_Read_the_article_and_select_the_best_answer3,True,,,,,,,,,,,,,,,,,
290
+ ,,,race_middle_Write_a_multi_choice_question_for_the_following_article,,,,,True,,,,,,,,,,,,,
291
+ ,,,race_middle_Write_a_multi_choice_question_for_the_following_article_2,,,,,True,,,,,,,,,,,,,
292
+ ,,,ropes_funky_prompt,True,,,,,,,,,,,,,,,,,
293
+ ,,,ropes_plain,True,,,,,,,,,,,,,,,,,
294
+ ,,,ropes_plain_bottom_hint,True,,,,,,,,,,,,,True,,,,
295
+ ,,,ropes_plain_no_background,True,,,,,,,,,True,,,,,,,,
296
+ ,,,ropes_prompt_beginning,True,,,,,,,,,,,,,,,,,
297
+ ,,,ropes_prompt_bottom_hint_beginning,True,,,,,,,,,,,,,,,,,
298
+ ,,,ropes_prompt_bottom_no_hint,True,,,,,,,,,True,,,,,,,,
299
+ ,,,ropes_prompt_mix,True,,,,,,,,,,,,,True,,,,
300
+ ,,,rotten_tomatoes_rt_1,,True,,,,,,,,,,,,,,,,
301
+ ,,,rotten_tomatoes_rt_10,True,,,,,,,,,,,,,,,,,
302
+ ,,,rotten_tomatoes_rt_2,,True,,,,,,,,,,,,,,,,
303
+ ,,,rotten_tomatoes_rt_3,,True,,,,,,,,,,,,,,,,
304
+ ,,,rotten_tomatoes_rt_4,,True,,,,,,,,,,,,,,,,
305
+ ,,,rotten_tomatoes_rt_5,,True,,,,,,,,,,,,,,,,
306
+ ,,,rotten_tomatoes_rt_6,,True,,,,,,,,,,,,,,,,
307
+ ,,,rotten_tomatoes_rt_7,,True,,,,,,,,,,,,,,,,
308
+ ,,,rotten_tomatoes_rt_8,,True,,,,,,,,,,,,,,,,
309
+ ,,,rotten_tomatoes_rt_9,,,,True,,,,,,,,,,,,,,
310
+ ,,,sciq_Template_0,,True,,,,,,,,,,,True,,,,,
311
+ ,,,sciq_Template_1,,True,,,,,,,,,,,True,,,,,
312
+ ,True,,social_i_qa_social_i_qa1,True,,,,,,,,,,,,,,,,,
313
+ ,,,social_i_qa_social_i_qa2,,True,,,,,,,,,,,,,,,,
314
+ select answer by ordinal word,True,,social_i_qa_social_i_qa3,True,,,,,,,,,,,,,,,,,
315
+ ,,,social_i_qa_social_i_qa4,,,,,True,,,,,,,,,,,,,
316
+ 4-way to binary classification,,,social_i_qa_social_i_qa5,,,,True,,,,,,,,,,,,,,
317
+ ,,,squad_v2_Jeopardy_with_Context,,,,,True,,,,,,,,,,,,,
318
+ ,,,squad_v2_Jeopardy_without_Context,,,,,True,,,,,True,,,,,,,,
319
+ ,,,squad_v2_Questions_with_Context,True,,,,,,,,,,,,,,,,,
320
+ nicely randomnized prompt phrasing,,,squad_v2_Questions_with_Context_Without_Prompt_Keywords,True,,,,,,,,,,,,,,,,,
321
+ ,,,squad_v2_Topic_Prediction_Context,,,,,,,,,,True,,,,,,,,
322
+ ,,,squad_v2_Topic_Prediction_Context_with_randomized_prompt_options,,,,,,,,,,True,,,,,,,,
323
+ ,,,squad_v2_Topic_Prediction_Context_with_randomized_prompt_options_placed_in_the_end,,,,,,,,,,True,,,,,,,,
324
+ ,,,squad_v2_Topic_Prediction_Question_and_Answer_Pair,,,,,,,,,,True,,,,,,,,
325
+ ,,,squad_v2_Trivia,,,,,,,,,,True,,,,,,,,
326
+ ,True,,super_glue_boolq_GPT_3_Style,,,,True,,,,,,,,,,,,,,
327
+ ,True,,super_glue_boolq_I_wonder_,,,,True,,,,,,,,,,,,,,
328
+ ,True,,super_glue_boolq_based_on_the_following_passage,,,,True,,,,,,,,,,,,,,
329
+ ,True,,super_glue_boolq_based_on_the_previous_passage,,,,True,,,,,,,,,,,,,,
330
+ ,True,,super_glue_boolq_could_you_tell_me_,,,,True,,,,,,,,,,,,,,
331
+ ,True,True,super_glue_cb_GPT_3_style,True,,,,,,,,,,,,,,,,,
332
+ ,True,True,super_glue_cb_based_on_the_previous_passage,True,,,,,,,,,,,,,,,,,
333
+ contrapositive,True,True,super_glue_cb_does_S1_contradict_S2_,True,,,,,,,,,True,,,,,,,,
334
+ ,True,True,super_glue_cb_does_S1_entail_S2_,True,,,,,,,,,,,,,,,,,
335
+ ,True,True,super_glue_cb_given_does_it_follow_that_,True,,,,,,,,,,,,,,,,,
336
+ must/might/may be true,True,True,super_glue_cb_given_it_must_be_true_that_,True,,,,,,,,,,,,,,,,,
337
+ ,True,,super_glue_copa_C1_or_C2_premise_so_because_,True,,,,,,,,,,,,,,,,,
338
+ effect examples,True,,super_glue_copa__As_a_result_C1_or_C2_,True,,,,,,,,,,,,,,,,,
339
+ effect examples,True,,super_glue_copa__What_could_happen_next_C1_or_C2_,True,,,,,,,,,,,,,,,,,
340
+ cause examples,True,,super_glue_copa__which_may_be_caused_by,True,,,,,,,,,,,,,,,,,
341
+ effect examples,True,,super_glue_copa__which_may_cause_C1_or_C2_,True,,,,,,,,,,,,,,,,,
342
+ cause examples,True,,super_glue_copa__why_C1_or_C2,True,,,,,,,,,,,,,,,,,
343
+ ,True,,super_glue_multirc_I_was_going_to_say_,,,,True,,,,,,,,,,,,,,
344
+ ,True,,super_glue_multirc_Would_it_be_good_to_answer_,,,,True,,,,,,,,,,,,,,
345
+ ,True,,super_glue_multirc_is_a_correct_answer_,,,,True,,,,,,,,,,,,,,
346
+ ,True,,super_glue_multirc_is_the_correct_answer_,,,,True,,,,,,,,,,,,,,
347
+ ,True,,super_glue_multirc_paragraph_question_is_it_,,,,True,,,,,,,,,,,,,,
348
+ ,True,,super_glue_record_Can_you_figure_out_,,True,,,,,,,,,,,,,,,,
349
+ ,True,,super_glue_record_In_the_question_above_the_placeholder_stands_for,,True,,,,,,,,,,,,,,,,
350
+ ,True,,super_glue_record_What_could_the_placeholder_be_,True,,,,,,,,,,,,,,,,,
351
+ no difference here?,True,,super_glue_record_Which_one_is_the_placeholder_,True,,,,,,,,,,,,,,,,,
352
+ ,True,,super_glue_record_the_placeholder_refers_to_,,True,,,,,,,,,,,,,,,,
353
+ ,True,True,super_glue_rte_GPT_3_style,True,,,,,,,,,,,,,,,,,
354
+ ,True,True,super_glue_rte_Suppose_Can_we_infer_that_,,,,True,,,,,,,,,,,,,,
355
+ ,True,True,super_glue_rte_based_on_the_previous_passage,,,,True,,,,,,,,,,,,,,
356
+ ,True,True,super_glue_rte_does_S1_entail_S2_,,,True,,,,,,,,,,,,,,,
357
+ ,True,True,super_glue_rte_given_does_it_follow_that_,,,,True,,,,,,,,,,,,,,
358
+ ,True,True,super_glue_rte__Therefore_we_re_licensed_to_say_that_,,,,True,,,,,,,,,,,,,,
359
+ ,True,True,super_glue_rte__does_the_previous_passage_support_the_claim_that,,,,True,,,,,,,,,,,,,,
360
+ ,True,,super_glue_wic_GPT_3_prompt,,,,True,,,,,,,,,,,True,,,
361
+ ,True,,super_glue_wic_GPT_3_prompt_with_label,,,True,,,,,,,,,,,,True,,,
362
+ ,True,,super_glue_wic_question_context,,,,True,,,,,,,,,,,True,,,
363
+ ,True,,super_glue_wic_question_context_meaning,,,,True,,,,,,,,,,,True,,,
364
+ ,True,,super_glue_wic_question_context_meaning_with_label,,,True,,,,,,,,,,,,True,,,
365
+ ,True,,super_glue_wic_similar_sense,,,,True,,,,,,,,,,,True,,,
366
+ ,True,,super_glue_wsc.fixed_Here_p_stands_for_,,,,,,,,,,,,,,,,,,
367
+ ,True,,super_glue_wsc.fixed_In_the_previous_sentence_the_pronoun_refers_to_,,,,,,,,,,,,,,,,,,
368
+ ,True,,super_glue_wsc.fixed_Who_is_are_,,,,,,,,,,,,,,,,,,
369
+ ,True,,super_glue_wsc.fixed_in_the_passage_above_the_pronoun_X_refers_to_,,,,,,,,,,,,,,,,,,
370
+ ,True,,super_glue_wsc.fixed_passage_what_does_the_pronoun_refer_to_,,,,,,,,,,,,,,,,,,
371
+ cast 4-way classification as binary,,,swag_regular_YesNo_0,,,True,,,,,,,,,,,,,,,
372
+ ,,,swag_regular_YesNo_1,,,True,,,,,,,,,,,,,,,
373
+ ,,,swag_regular_YesNo_2,,,True,,,,,,,,,,,,,,,
374
+ ,,,swag_regular_YesNo_3,,,True,,,,,,,,,,,,,,,
375
+ ,,,swag_regular_YesNo_reversed_0,,,True,,,,,,,,,,,,,,,
376
+ ,,,swag_regular_YesNo_reversed_1,,,True,,,,,,,,,,,,,,,
377
+ ,,,swag_regular_YesNo_reversed_2,,,True,,,,,,,,,,,,,,,
378
+ ,,,swag_regular_YesNo_reversed_3,,,True,,,,,,,,,,,,,,,
379
+ ,,,swag_regular_complete_first_then,True,,,,,,,,,,,,,,,,,
380
+ ,,,swag_regular_first_then,True,,,,,,,,,,,,,,,,,
381
+ ,,,swag_regular_how_ends,True,,,,,,,,,,,,,,,,,
382
+ ,,,swag_regular_if_begins_how_continues,True,,,,,,,,,,,,,,,,,
383
+ ,,,swag_regular_which_ending,True,,,,,,,,,,,,,,,,,
384
+ ,,,trec_fine_grained_ABBR,True,,,,,,,,,,,,,,,,,
385
+ ,,,trec_fine_grained_ABBR_context_first,True,,,,,,,,,,,,,,,,,
386
+ ,,,trec_fine_grained_DESC,True,,,,,,,,,,,,,,,,,
387
+ ,,,trec_fine_grained_DESC_context_first,True,,,,,,,,,,,,,,,,,
388
+ ,,,trec_fine_grained_ENTY,True,,,,,,,,,,,,,,,,,
389
+ ,,,trec_fine_grained_ENTY_context_first,True,,,,,,,,,,,,,,,,,
390
+ ,,,trec_fine_grained_HUM,True,,,,,,,,,,,,,,,,,
391
+ ,,,trec_fine_grained_HUM_context_first,True,,,,,,,,,,,,,,,,,
392
+ ,,,trec_fine_grained_LOC,True,,,,,,,,,,,,,,,,,
393
+ ,,,trec_fine_grained_LOC_context_first,True,,,,,,,,,,,,,,,,,
394
+ ,,,trec_fine_grained_NUM,True,,,,,,,,,,,,,,,,,
395
+ ,,,trec_fine_grained_NUM_context_first,True,,,,,,,,,,,,,,,,,
396
+ ,,,trec_fine_grained_open,,True,,,,,,,,,,,,,,,,
397
+ ,,,trec_fine_grained_open_context_first,,True,,,,,,,,,,,,,,,,
398
+ answers are not what the questions ask for,,True,trec_gao_et_al_1,,,,,,,,,,,,True,,,,,,
399
+ answers are not what the questions ask for,,True,trec_gao_et_al_2,,,,,,,,,,,,True,,,,,,
400
+ ,,,trec_trec1,True,,,,,,,,,,,,,,,,,
401
+ ,,,trec_trec2,True,,,,,,,,,,,,,,,,,
402
+ ,,,trivia_qa_rc_context_self_description,,,,,,,,,,,,,,,,,,
403
+ ,,,trivia_qa_rc_guess_question,,,,,True,True,,,,True,,,,,,,,
404
+ ,,,trivia_qa_rc_question_answer,,,,,,,,,,,,,,,,,,
405
+ ,,,trivia_qa_rc_question_with_instruction,,,,,,,,,,,,,,,,,,
406
+ ,,,trivia_qa_rc_reading_comprehension_1,,,,,,,,,,True,,,,,,,,
407
+ ,,,trivia_qa_rc_reading_comprehension_2,,,,,,,,,,True,,,,,,,,
408
+ ,,,web_questions_count_answers,,,,,,,,,True,,,,,,,,,
409
+ ,,,web_questions_credible_question,,,,,True,,,,,,,,,,,,,
410
+ ,,,web_questions_if_answers_what_question,,,,,True,,,,,,,,,,,,,
411
+ ,,,web_questions_potential_correct_answer,,,,,,,,,,,True,,,,,,,
412
+ ,,,web_questions_question_answer,,,,,,,,,,,,,,,,,,
413
+ ,,,web_questions_suggest_question,,,,,True,,,,,,,,,,,,,
414
+ ,,,wiki_bio_comprehension,,,,,,,,,,True,,,,,,,,
415
+ ,,,wiki_bio_guess_person,,,,,,,,,,True,,,,,,,,
416
+ ,,,wiki_bio_key_content,,,,,,,,,,True,,,,,,,,
417
+ ,,,wiki_bio_what_content,,,,,,,,,,True,,,,,,,,
418
+ "should rephrase ""summarize""",,,wiki_bio_who,,,,,,,,,,,,,,,,,,
419
+ ,,,wiki_hop_original_Choose_Best_Object_Candidate,,,,,,,,,,True,,,,,,,,True
420
+ ,,,wiki_hop_original_Explain_Relation,,True,,,,,,,,True,,,,,,,,
421
+ ,,,wiki_hop_original_Generate_Fact_Triple,,,,,,,,,,True,,,,,,,,True
422
+ ,,,wiki_hop_original_Generate_Object_Answer,,,,,,,,,,True,,,,,,,,True
423
+ ,,,wiki_hop_original_Generate_Subject_Answer,,,,,,,,,,True,,,,,,,,True
424
+ ,,,wiki_hop_original_Indirect_Question_about_Birthplace_Citizenship_Place_of_Death,,,,,,,,,,,,,True,,,,,
425
+ ,,,wiqa_effect_with_label_answer,True,,,,,,,,,,,,,,,,,
426
+ ,,,wiqa_effect_with_string_answer,True,,,,,,,,,,,,,,,,,
427
+ ,,,wiqa_impacting_the_process,,,,True,,,,,,,,,,,,,,
428
+ ,,,wiqa_question_type,,,,,,,,,,True,,,,,,,,
429
+ ,,,wiqa_remove_first_step,,,,,,,,,,True,,,,,,,,
430
+ ,,,wiqa_remove_first_step_bis,,,,,,,,,,True,,,,,,,,
431
+ ,,,wiqa_remove_last_step,,,,,,,,,,True,,,,,,,,
432
+ ,,,wiqa_remove_last_step_bis,,,,,,,,,,True,,,,,,,,
433
+ ,True,,xsum_Document_,,,,,,,,,,,,,,,,,,
434
+ ,True,,xsum_Summarize_this_document_,,,,,,,,,,,,,,,,,,
435
+ ,True,,xsum_TLDR,,,,,,,,,,,,,,,,,,
436
+ ,True,,xsum_generate_summary_for_this,,,,,,,,,,,,,,,,,,
437
+ ,True,,xsum_summarize_,,,,,,,,,,,,,,True,,,,
438
+ ,True,,xsum_write_one_sentence,,,,,,,,,,,,,,,,,,
439
+ ,,,yelp_review_full_based_on_that,,True,,,,,,,,,,,,,,,,
440
+ ,,,yelp_review_full_format_rating,,True,,,,,,,,,,,,,,,,
441
+ ,,,yelp_review_full_format_score,,True,,,,,,,,,,,,,,,,
442
+ ,,,yelp_review_full_format_star,,True,,,,,,,,,,,,,,,,
443
+ ,,,yelp_review_full_on_a_scale,,True,,,,,,,,,,,,,,,,
444
+ ,,,yelp_review_full_so_i_would,,True,,,,,,,,,,,,,,,,
445
+ ,,,yelp_review_full_this_place,,True,,,,,,,,,,,,,,,,
promptsource/seqio_tasks/experiment_D4.csv ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ HF_name,subset,task_by_convention,format,comment,seed_paper,september_check,do_train,do_eval,train_size,adjusted_train_size,D3_do_train,D3_do_eval,D3_adjusted_train_size,metric,multiple correct answer,Paper link,non_linguistic_knowledge,skip,Imported Task Name,imported category,input_length,_human_skill,Domain,Reference
2
+ crows_pairs,,bias_and_fairness,,test set only; authors themselves acknowledge some problems,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
3
+ jigsaw_toxicity_pred,,bias_and_fairness,,current https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/data ; want https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
4
+ super_glue,axg,bias_and_fairness,cls,test set only,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
5
+ winogender,,bias_and_fairness,cls,also as axg in super_glue,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
6
+ wino_bias,type1_anti,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
7
+ wino_bias,type2_anti,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
8
+ wino_bias,type1_pro,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
9
+ wino_bias,type2_pro,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
10
+ super_glue,wsc.fixed,coreference,cls,,,,,TRUE,554,0,TRUE,TRUE,554,accuracy,,https://arxiv.org/pdf/1905.00537.pdf,,,superglue-wsc,cls/other,single sentence,knowledge-? reading comprehension,,Levesque et al. 2012
11
+ winograd_wsc,wsc273,coreference,ext,,GPT,,,TRUE,0,0,,,0,accuracy,,https://www.aaai.org/ocs/index.php/KR/KR12/paper/download/4492/4924,,,,,,,,Levesque et al. 2012
12
+ winogrande,winogrande_xl,coreference,ext,,GPT,TRUE,,TRUE,40398,0,,,0,accuracy,,https://arxiv.org/pdf/1907.10641.pdf,,,WinoGrande,qa/multiple-choice qa,,knowledge-? reading comprehension,,Sakaguchi et al. 2020
13
+ winogrande,winogrande_debiased,coreference,ext,"""debiased"" = adversarially filtered",GPT,TRUE,,TRUE,9248,0,,,0,accuracy,,https://arxiv.org/pdf/1907.10641.pdf,,,WinoGrande,qa/multiple-choice qa,,knowledge-? reading comprehension,,Sakaguchi et al. 2020
14
+ glue,cola,grammatical_acceptability,cls,includes semantic acceptability too; to be replaced by blimp,,,,TRUE,8551,0,,TRUE,0,accuracy;matthews_corrcoef,,https://arxiv.org/pdf/1805.12471.pdf,,,glue-cola,cls/other,single sentence,,,Warstadt et al. 2019
15
+ super_glue,cb,NLI,cls,"""for multi-class F1 we compute the unweighted average of the F1 per class.""",,TRUE,,TRUE,250,0,,TRUE,0,mean_multiclass_f1;accuracy,,https://semanticsarchive.net/Archive/Tg3ZGI2M/Marneffe.pdf,,,superglue-cb,cls/nli,sentence pair,knowledge-neutral inference,,de Marneffe et al. 2019
16
+ super_glue,rte,NLI,cls,,,TRUE,,TRUE,2490,0,,TRUE,0,accuracy,,https://arxiv.org/pdf/1905.00537.pdf,,,superglue-rte,cls/nli,sentence pair,knowledge modest inference,,Dagan et al. 2005; Bar-Haim et al. 2006 Giampiccolo et al. 2007; Bentivogli et al. 2009
17
+ anli,,NLI,cls,"In addition to accuracy, paper also evaluates on range of relaxed/strict and matched/unmatched settings and reports F scores for different answers",,,,TRUE,162865,0,,TRUE,0,accuracy,,https://arxiv.org/abs/1910.14599,,,anli,cls/nli,sentence pair,knowledge modest inference,,Nie et al. 2020
18
+ hans,,NLI,cls,,,TRUE,,TRUE,0,0,,TRUE,0,accuracy,,https://arxiv.org/pdf/1902.01007.pdf,,,,,sentence pair,syntax?,,McCoy et al. 2019
19
+ super_glue,axb,NLI,cls,test set only,,TRUE,,TRUE,0,0,,,,,,,,,,,,,,
20
+ glue,mrpc,paraphrase,cls,,,,TRUE,TRUE,3668,3668,TRUE,TRUE,3668,accuracy;f1_score,,https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/I05-50025B15D.pdf,,,glue-mrpc,cls/paraphrase,,paraphrase,,Dolan and Brockett 2005
21
+ glue,qqp,paraphrase,cls,,,,TRUE,TRUE,363846,363846,TRUE,,363846,accuracy;f1_score,,https://aclanthology.org/I05-5002.pdf,,,glue-qqp,cls/paraphrase,,,,(link)
22
+ paws,labeled_final,paraphrase,cls,,,,TRUE,,49401,49401,TRUE,,49401,,,,,,paws,cls/paraphrase,,,,Zhang et al. 2019
23
+ ai2_arc,ARC-Challenge,QA_closed_book,cls,,GPT,,,TRUE,1119,0,TRUE,,1119,"accuracy_with_tie : For each question, a system receives 1 point if it
24
+ chooses the correct answer and 1/k if it reports a k-way tie
25
+ (i.e., chooses multiple answers) that includes the correct answer.",,https://arxiv.org/pdf/1803.05457.pdf,mid-intensive,,ARC (chal.),qa/multiple-choice qa,,nontrivial_comprehension,,Clark et al. 2018
26
+ ai2_arc,ARC-Easy,QA_closed_book,cls,,GPT,,,TRUE,2251,0,TRUE,,2251,"accuracy_with_tie: For each question, a system receives 1 point if it
27
+ chooses the correct answer and 1/k if it reports a k-way tie
28
+ (i.e., chooses multiple answers) that includes the correct answer.",,https://arxiv.org/pdf/1803.05457.pdf,mid-intensive,,ARC (easy),Multiple choice,,,,
29
+ nq_open,,QA_closed_book,gen,,GPT,TRUE,,TRUE,87925,0,,TRUE,0,kilt-exact_match;average_accuracy_accross_answers,TRUE,https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00276/43518/Natural-Questions-A-Benchmark-for-Question,intensive,,Natural Questions (open domain),,,trivia,,
30
+ kilt_tasks,hotpotqa,QA_closed_book,gen,recast as closed-book due to input length,self,,TRUE,,88869,88869,,,,,,,,,kilt hotpotqa,qa/closed-book qa,,encyclopedia; multi-hop QA,,Yang et al. 2018
31
+ trivia_qa,unfiltered,QA_closed_book,gen,,GPT,TRUE,,TRUE,87622,0,TRUE,,87622,exact_match;f1_over_words => wikipedia aliases are considered valid answers,TRUE,https://arxiv.org/pdf/1705.03551.pdf,intensive,,Trivia QA,,,,,
32
+ web_questions,,QA_closed_book,gen,"""supposed to be answerable by Freebase"" Check corpora deduplication with freebaseqa.",GPT,,,TRUE,3778,0,TRUE,,3778,accuracy : they don't mention how they normalize across multiple correct answers,TRUE,https://aclanthology.org/D13-1160.pdf,intensive,,web questions,qa/closed-book qa,,,,Berant et al. 2013
33
+ wiki_qa,,QA_closed_book,cls,,CrossFit,,TRUE,,20360,20360,,,,,,https://aclanthology.org/D15-1237.pdf,,,wiki qa,cls/other,,,,Yang et al. 2015
34
+ adversarial_qa,dbidaf,QA_extractive,ext,,,TRUE,TRUE,,10000,10000,TRUE,,10000,,,https://aclanthology.org/2020.tacl-1.43/,,,adversarialqa,qa/machine reading comprehension,,,,Bartolo et al. 2020
35
+ adversarial_qa,dbert,QA_extractive,ext,,,TRUE,TRUE,,10000,10000,TRUE,,10000,,,,,,,,,,,
36
+ adversarial_qa,droberta,QA_extractive,ext,,,TRUE,TRUE,,10000,10000,TRUE,,10000,,,,,,,,,,,
37
+ coqa,,QA_extractive,ext,GPT-easy,GPT,,,TRUE,7199,,,,,"macro_average_f1: for computing a model’s performance, each individual prediction is compared
38
+ against n human answers resulting in n F1 scores,
39
+ the maximum of which is chosen as the prediction’s
40
+ F1.For each question, we average out F1 across
41
+ these n sets, both for humans and models. In our
42
+ final evaluation, we use n = 4 human answers for
43
+ every question (the original answer and 3 additionally collected answers). The articles a, an and the
44
+ and punctuations are excluded in evaluation.",from the paper it seems it could contain multiple answers but the datasets has only one answer per question,https://arxiv.org/pdf/1808.07042.pdf,,,,,,,,
45
+ duorc,SelfRC,QA_extractive,ext,,TaskEmbed;CrossFit,,TRUE,,60721,60721,,,,,,https://duorc.github.io/,,,DuoRC,qa/machine reading comprehension,,,Wikipedia/IMDB crowd,Saha et al. 2018
46
+ duorc,ParaphraseRC,QA_extractive,ext,,TaskEmbed;CrossFit,,TRUE,,69524,69524,,,,,,https://arxiv.org/pdf/1804.07927.pdf,,,DuoRC,paraphrased QA,,,,Saha et al. 2018
47
+ ropes,,QA_extractive,ext,,,TRUE,TRUE,,10924,10924,TRUE,,10924,,,,modest,,ropes,Extractive QA,,cause_and_effect;nontrivial_comprehension,,Lin et al. 2019
48
+ squad_v2,,QA_extractive,ext,,GPT,,,TRUE,130319,0,TRUE,,130319,exact_match;f1_score,TRUE,https://arxiv.org/pdf/1806.03822.pdf,,,SQuAD 2.0,Extractive QA,,,,Rajpurkar et al. 2018
49
+ super_glue,record,QA_extractive,ext,,,TRUE,,TRUE,100730,0,TRUE,TRUE,100730,max_token_level_f1;exact_match,TRUE,https://arxiv.org/pdf/1810.12885.pdf,,,superglue-record,qa/machine reading comprehension,,knowledge-? reading comprehension,,Zhang et al. 2018
50
+ qa_srl,,QA_extractive,ext,"need non-naive metric (""If the predicted word is contained inside the annotated answer span it is considered a correct prediction.""); v2 not in HF https://aclanthology.org/P18-1191.pdf",Eval WG,,,TRUE,6414,0,TRUE,TRUE,6414,accuracy,TRUE,https://dada.cs.washington.edu/qasrl/#page-top,neutral,,qa srl,other,,semantic role,,He et al. 2015
51
+ quac,,QA_extractive,ext,,GPT,,,TRUE,11567,,,,,"average_maximum_f1;HEQ-Q;HEQ-D: To make oracle human and system performance comparable,
52
+ given n references, we report the average of the
53
+ maximum F1 computed from each n − 1 subset
54
+ with respect to the heldout reference.",TRUE,https://arxiv.org/pdf/1808.07036.pdf,,,,,,dialogue,,
55
+ quoref,,QA_extractive,ext,,,TRUE,TRUE,,19399,19399,TRUE,,19399,,,https://aclanthology.org/D19-1606.pdf,,,Quoref,Extractive QA,,,,Dasigi et al. 2019
56
+ tydiqa,,QA_extractive,ext,,Eval WG,,TRUE,,9211,9211,,,,,,,,,,,,,,
57
+ drop,,QA_generative,gen,"nontrivial math; try history_690, it's pretty hard even when I have domain knowledge",GPT,TRUE,,TRUE,,,,,,exact_match; macro_average_f1,TRUE,https://aclanthology.org/N19-1246.pdf,,,DROP ,multi-hop quantitative reasoning; Abstractive QA,,numerical,Wikipedia crowd,Dua et al. 2019
58
+ cos_e,v1.11,QA_multiple_choice,cls,"same as commonsense_qa but with (poorly sourced) human explanations; questionable ""commonsense"" lots of world knowledge",Vania,TRUE,TRUE,,9741,9741,TRUE,,9741,,,,,,cos e,other/generate explanation,,,,Rajani et al. 2019
59
+ cosmos_qa,,QA_multiple_choice,cls,,,TRUE,TRUE,,25262,25262,TRUE,,25262,,,,,,cosmos qa,qa/multiple-choice qa,,,,Huang et al. 2019
60
+ dream,,QA_multiple_choice,cls,,,TRUE,TRUE,,6116,6116,TRUE,,6116,,,,,,dream,qa/multiple-choice qa,,,,Sun et al. 2019
61
+ openbookqa,main,QA_multiple_choice,cls,interesting combo of pragmatics + scientific reasoning,GPT,,,TRUE,4957,0,TRUE,TRUE,4957,"accuracy_with_tie : For each question, a system receives 1 point if it
62
+ chooses the correct answer and 1/k if it reports a k-way tie
63
+ (i.e., chooses multiple answers) that includes the correct answer.",,https://aclanthology.org/D18-1260.pdf,modest,,openbookqa,qa/multiple-choice qa,,pragmatics,,Mihaylov et al. 2018
64
+ qasc,,QA_multiple_choice,cls,,,TRUE,TRUE,,8134,8134,TRUE,,8134,,,,given?,,qasc,qa/multiple-choice qa,,,,Khot et al. 2020
65
+ quail,,QA_multiple_choice,cls,,,TRUE,TRUE,,10246,10246,TRUE,,10246,,,,,,quail,qa/multiple-choice qa,,,,Rogers et al. 2020
66
+ quarel,,QA_multiple_choice,cls,,CrossFit,,TRUE,,1941,1941,,,,,,,,,quarel,qa/multiple-choice qa,,logical form,,Tafjord et al. 2019a
67
+ quartz,,QA_multiple_choice,cls,,,TRUE,TRUE,,2696,2696,TRUE,,2696,,,https://aclanthology.org/D19-1608.pdf,given?,,quartz-with knowledge,qa/multiple-choice qa,,,,Tafjord et al. 2019b
68
+ race,high,QA_multiple_choice,cls,GPT-hard,GPT,,,TRUE,62445,0,TRUE,TRUE,62445,accuracy,,https://arxiv.org/pdf/1704.04683.pdff,neutral,,race-high,qa/multiple-choice qa,,knowledge-neutral reading comprehension,,Lai et al. 2017
69
+ race,middle,QA_multiple_choice,cls,"revisit: define as comprehension, paragraph level?",GPT,,,TRUE,25421,0,TRUE,TRUE,25421,accuracy,,https://arxiv.org/pdf/1704.04683.pdf,neutral,,race-middle,qa/multiple-choice qa,,knowledge-neutral reading comprehension,,Lai et al. 2017
70
+ sciq,,QA_multiple_choice,cls,,,TRUE,TRUE,,11679,11679,TRUE,,11679,,,,,,sciq,qa/multiple-choice qa,,,,Welbl et al. 2017
71
+ social_i_qa,,QA_multiple_choice,cls,metric differ by prompt: 4-way classification cast as binary ,,TRUE,TRUE,TRUE,33410,33410,TRUE,TRUE,33410,accuracy,,https://arxiv.org/pdf/1904.09728.pdf,,,SIQA,qa/multiple-choice qa,,cultural knowledge,,Sap et al. 2019
72
+ super_glue,boolq,QA_multiple_choice,cls,,,TRUE,,TRUE,9427,0,TRUE,TRUE,9427,accuracy,,https://arxiv.org/pdf/1905.10044.pdf,neutral?,,superglue-boolq,,,knowledge-? reading comprehension,,
73
+ super_glue,copa,QA_multiple_choice,cls,,,TRUE,,TRUE,400,0,TRUE,TRUE,400,accuracy,,http://commonsensereasoning.org/2011/papers/Roemmele.pdf,modest,,superglue-copa,qa/multiple-choice qa,,causal cognition,,Gordon et al. 2012
74
+ super_glue,multirc,QA_multiple_choice,cls,F1 over all answer options. See paper p. 259 for defintion,,TRUE,,TRUE,27243,0,TRUE,TRUE,27243,f1_over_all_options;exact_match,,https://aclanthology.org/N18-1023.pdf,neutral?,,superglue-multirc,qa/multiple-choice qa,,knowledge-? reading comprehension,,Khashabi et al. 2018
75
+ wiki_hop,original,QA_multiple_choice,cls,,,TRUE,TRUE,,43738,43738,TRUE,,43738,,,https://transacl.org/ojs/index.php/tacl/article/viewFile/1325/299,,,WikiHop (Welbl et al. 2018),multi-hop QA,,,Wikipedia KB,
76
+ wiqa,,QA_multiple_choice,cls,,,TRUE,TRUE,,29808,29808,TRUE,,29808,,,,,,wiqa,qa/multiple-choice qa,,cause_and_effect,,Tandon et al. 2019
77
+ circa,,QA_multiple_choice,cls,revisit: problematic prompts,,,,TRUE,34268,0,,TRUE,0,mean_multiclass_f1;accuracy,,https://arxiv.org/pdf/2010.03450.pdf,,,circa,cls/other,,pragmatics,,Louis et al. 2020
78
+ mc_taco,,QA_multiple_choice,cls,no train set; variable number of answer_chocies; eval in paper is over set of possible candidates;,,,,TRUE,0,0,,TRUE,0,exact_match; f1_score,,https://arxiv.org/pdf/1909.03065.pdf,,,mc taco,qa/binary,,temporal cognition,,Zhou et al. 2019
79
+ piqa,,QA_multiple_choice,cls,revisit: not just other,GPT,,,TRUE,16113,0,TRUE,,16113,accuracy,,https://arxiv.org/pdf/1911.11641.pdf,,,PIQA,Multiple choice,,physical_cognition,,Bisk et al. 2020
80
+ amazon_polarity,,sentiment,cls,,,TRUE,TRUE,,3600000,500000,TRUE,,500000,,,https://cs.stanford.edu/people/jure/pubs/reviews-recsys13.pdf,,,amazon polarity,cls/sentiment analysis,,,,McAuley and Leskovec 2013
81
+ app_reviews,,sentiment,cls,,,TRUE,TRUE,,288065,288065,TRUE,,288065,,,,,,app reviews,other/regression,,,,Missing
82
+ imdb,,sentiment,cls,,,TRUE,TRUE,,25000,25000,TRUE,,25000,,,,,,imdb,cls/sentiment analysis,,no dev set,,Maas et al. 2011
83
+ rotten_tomatoes,,sentiment,cls,,,TRUE,TRUE,,8530,8530,TRUE,,8530,,,,,,rotten tomatoes,cls/sentiment analysis,,,,Pang and Lee 2005
84
+ yelp_review_full,,sentiment,cls,no dev set,,TRUE,TRUE,,650000,500000,TRUE,,500000,,,,,,yelp review full,other/regression,,,,Zhang et al. 2015; (link)
85
+ lambada,,story_completion,gen,revisit: story or cloze or coref? trivial cloze prompt; training set is just unlabeled corpora; GPT task,GPT,,,TRUE,0,0,,TRUE,0,accuracy;perplexity;median_rank,,https://arxiv.org/pdf/1606.06031.pdf,,,,,,,,
86
+ craffel/openai_lambada,,story_completion,gen,revisit: story or cloze or coref? trivial cloze prompt; training set is just unlabeled corpora; GPT task,GPT,,,TRUE,0,0,,TRUE,0,accuracy;perplexity;median_rank,,https://arxiv.org/pdf/1606.06031.pdf,,,,,,,,
87
+ story_cloze,2016,story_completion,cls,todo: custom loading; swag like?,GPT,,,TRUE,,0,,TRUE,0,accuracy,,https://arxiv.org/pdf/1604.01696.pdf,,,,,,,,
88
+ hellaswag,,story_completion,cls,,GPT,,,TRUE,39905,0,TRUE,,39905,accuracy,,https://arxiv.org/pdf/1905.07830.pdf,,,hellaswag,qa/multiple-choice qa,,,,Zellers et al. 2019
89
+ common_gen,,structure_to_text,gen,,,TRUE,TRUE,,67389,67389,TRUE,,67389,,,,,,common gen,other,,,,Lin et al. 2020b
90
+ wiki_bio,,structure_to_text,gen,,,TRUE,TRUE,,582659,500000,TRUE,,500000,,,,,,wiki bio,cg/other,,,,Lebret et al. 2016
91
+ cnn_dailymail,3.0.0,summarization,gen,,,TRUE,TRUE,,287113,287113,TRUE,,287113,,,,,,,,,,,
92
+ gigaword,,summarization,gen,,,TRUE,TRUE,,3803957,500000,TRUE,,500000,,,,,,gigaword,cg/summarization,,,,Napoles et al. 2012
93
+ multi_news,,summarization,gen,,CrossFit,,TRUE,,44972,44972,,,,,,,,,multi news,cg/summarization,,,,Fabbri et al. 2019
94
+ samsum,,summarization,gen,,CrossFit,,TRUE,,14732,14732,,,,,,,,,samsum,cg/summarization,,,,Gliwa et al. 2019
95
+ xsum,,summarization,gen,,,TRUE,TRUE,TRUE,204045,204045,TRUE,TRUE,204045,rouge,,https://arxiv.org/pdf/1808.08745.pdf,,,xsum,cg/summarization,,,,Narayan et al. 2018
96
+ ag_news,,topic_classification,cls,,,TRUE,TRUE,,120000,120000,TRUE,,120000,,,http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html,,,ag news,cls/topic,,,,Gulli (link)
97
+ dbpedia_14,,topic_classification,cls,,,TRUE,TRUE,,560000,500000,TRUE,,500000,,,https://svn.aksw.org/papers/2013/SWJ_DBpedia/public.pdf,,,dbpedia 14,cls/topic,,,,Lehmann et al. 2015
98
+ trec,,topic_classification,cls,,,TRUE,TRUE,,5452,5452,TRUE,,5452,,,https://trec.nist.gov/data/qa.html,,,trec,cls/other,,,,Li and Roth 2002; Hovy et al. 2001
99
+ super_glue,wic,word_sense_disambiguation,cls,,,TRUE,,TRUE,5428,0,TRUE,TRUE,5428,accuracy,,https://arxiv.org/pdf/1808.09121.pdf,,,superglue-wic,cls/other,,lexical_knowledge,,Pilehvar and Camacho-Collados 2019
100
+ Staging Area,,,,,,,,,,,,,,,,,,,,,,,,
101
+ Would Include but not in HF or some other practical limitations,,,,,,,,,,,,,,,,,,,,,,,,
102
+ definite_pronoun_resolution,,coreference,,todo: download error,,,,,,,,,,,,,,,definite pronoun resolution,other,,,,Rahman and Ng 2012
103
+ jeopardy,,closed-book qa,gen,sporadic download error,CrossFit,,,,,,,,,,,,,promptsource download error,jeopardy,qa/closed-book qa,,,,(link)
104
+ blimp,,,cls,no prompts yet; collapse subsets,,,,,,0,,,0,,,,,,,,,,,
105
+ Hendrycks et al. 2021,,,,https://arxiv.org/abs/2009.03300v3,,,,,,,,,,,,,,,,,,,,
106
+ Multi-Turn Dialogue Reasoning,,,,https://aclanthology.org/2020.acl-main.130.pdf,Vania,,,,7088,,,,,,,,,,,,,,,
107
+ Argument Reasoning Comprehension Task,,,,https://aclanthology.org/N18-1175.pdf,Vania,,,,1211,,,,,,,,,,,,,,,
108
+ MCScript,,,,https://aclanthology.org/L18-1564.pdf,Vania,,,,14191,,,,,,,,,,,,,,,
109
+ narrativeqa,,,,very long input sequence,,,,,,,,,,,,,,skip for experiment D3: very long input sequence,NarQA,Abstractive QA,,,,
110
+ newsqa,,,,download error,TaskEmbed,,,,,,,,,,,,,promptsource download error,NewsQA,Extractive QA,,,,Trischler et al. 2017
111
+ eli5,,,,dataset split error,CrossFit,,,,,,,,,,,https://facebookresearch.github.io/ELI5/explore.html,,skip: HF datasets error the split field is used for subsets,eli5-askh,qa/long-form qa,,possibly knowledge-neutral,,Fan et al. 2019
112
+ Maybe Reconsider,,,,,,,,,,,,,,,,,,,,,,,,
113
+ zest,,,,its original task is quite complex (need to provide a decision function); should be held-out eval only,self,,,,,,,,,,,,,,,,,,,
114
+ swag,,story_completion,cls,revisit whether this should be considered as a variant of NLI,,,,,73546,0,TRUE,,73546,,,,,,swag,qa/multiple-choice qa,,,,Zellers et al. 2018
115
+ codah,codah,story_completion,cls,a variant of swag revisit whether this should be considered as a variant of NLI,,,,,2776,0,TRUE,,2776,,,,,,codah,qa/multiple-choice qa,,,,Chen et al. 2019
116
+ wiki_auto,,,,revisit: lots of duplicate simplified text; novel generative task could be very challenging,CrossFit,,,,,,,,,,,,,no prompt yet,wiki auto,cls/other,,text simplification,,Jiang et al. 2020
117
+ proto_qa,,,gen,"generate prototypical concepts, kinda niche format with multiple correct answers",CrossFit,,,,,,,,,,,,,no prompt yet,proto qa,other,,,,Boratko et al. 2020
118
+ empathetic_dialogues,,,,generation? classification?,CrossFit,,,,,,,,,,,https://arxiv.org/pdf/1811.00207.pdf,,no prompt yet,empathetic dialogues,cg/dialogue,,,,Rashkin et al. 2019
119
+ qed,,,,uses held-out Natural Questions,,,,,,,,,,,,,,,,,,,,
120
+ kilt_tasks,aidayago2,,,,,,,,,,,,,,,,,no prompt yet,kilt ay2,other/entity linking,,encyclopedia,,Hoffart et al. 2011
121
+ kilt_tasks,wow,,,,,,,,,,,,,,,,,no prompt yet,kilt wow,cg/dialogue,,encyclopedia,,Dinan et al. 2019
122
+ lama,conceptnet,,,,,,,,,,,,,,,,,no prompt yet,lama-conceptnet,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
123
+ lama,google_re,,,,,,,,,,,,,,,,,no prompt yet,lama-google re,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
124
+ lama,squad,,,,,,,,,,,,,,,,,no prompt yet,lama-squad,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
125
+ lama,trex,,,,,,,,,,,,,,,,,no prompt yet,lama-trex,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
126
+ limit,,physical cognition,,,,,,,,,,,,,,https://aclanthology.org/2020.findings-emnlp.88.pdf,,label errors in dataset itself? also no validation set otherwise well motivated by semantic theories,limit,other,,physical semantic repr.,,Manotas et al. 2020
127
+ kilt_tasks,fever,,,revisit whether this should be considered as a variant of NLI,,,,,,,,,,,,,,temporary skip: prompts available in non-benchmark standalone dataset,kilt fever,cls/fact checking,,encyclopedia,,Thorne et al. 2018
128
+ Skipped,,,,,,,,,,,,,,,,,,,,,,,,
129
+ fever,v2.0,closed-book qa/fact checking,,also in KILT,,,,,,,,,,,,,,skip: awkward prompts as closed-book qa,FEVER,,,,,
130
+ hotpot_qa,distractor,,,also in KILT,,,,,,,,,,,,,,skip for experiment D3: very long input sequence,Hotpot QA,,,,,
131
+ hotpot_qa,fullwiki,,,also in KILT,,,,,,,,,,,,,,skip for experiment D3: very long input sequence,Hotpot QA,,,,,
132
+ emo,,sentiment,cls,skip: offensive and ungrammatical text,,merged,,,30160,0,TRUE,TRUE,30160,precision;recall;F1,,https://aclanthology.org/S19-2005.pdf,,skip: offensive and ungrammatical text,emo,cls/emotion,,,,Chatterjee et al. 2019
133
+ freebase_qa,,QA_closed_book,gen,"need to be held out because web_questions is ""supposed to be answerable by Freebase""",,,,,20358,0,TRUE,,20358,,,,intensive,,freebase qa,qa/closed-book qa,,,,Jiang et al. 2019
134
+ aqua_rat,,,,,,,,,,,,,,,,https://arxiv.org/abs/1705.04146,,skip: nontrivial math,aqua rat,qa/multiple-choice qa,,nontrivial math,,Ling et al. 2017
135
+ math_qa,,,,,,,,,,,,,,,,,,skip: nontrivial math,math qa,qa/multiple-choice qa,,nontrivial math,,Amini et al. 2019
136
+ numer_sense,,,,,,,,,,,,,,,,,,skip: closed-book trivia ,numer sense,qa/closed-book qa,,numerical knowledge,,Lin et al. 2020a
137
+ squad_adversarial,,,,,,,,,,,,,,,,,,validation set only,,,,,,
138
+ squadshifts,,,,,,,,,,,,,,,,,,test set only,,,,,,
139
+ sms_spam,,,,,,,,,,,,,,,,,,skip: unclean corpus and likely harmful content,sms spam,cls/other,,,,Almeida et al. 2011
140
+ search_qa,,,,,,,,,,,,,,,,,,skip: seems like a very unclean corpus,search qa,qa/closed-book qa,,,,Dunn et al. 2017
141
+ kilt_tasks,trex,,,,,,,,,,,,,,,,,skip: non-natural language,kilt trex,qa/closed-book qa,,encyclopedia,,Elsahar et al. 2018
142
+ kilt_tasks,structured_zeroshot,,,,,,,,,,,,,,,,,skip: non-natural language,kilt zsre,qa/closed-book qa,,encyclopedia,,Levy et al. 2017
143
+ spider,,,,,,,,,,,,,,,,,,skip: non-natural language,spider,cg/other,,,,Yu et al. 2018
144
+ wikisql,,,,,,,,,,,,,,,,,,skip: non-natural language,wikisql,cg/other,,,,Zhong et al. 2017
145
+ com_qa,,,,,CrossFit,,,,,,,,,,,https://arxiv.org/pdf/1809.09528.pdf,,skip: non-human language: URL,ComQA (Abujabal et al. 2019),factoid QA w/ paraphrases,,,snippets WikiAnswers,
146
+ climate_fever,,,,revisit whether this should be considered as a variant of NLI,,,,,,,,,,,,,,skip: no train set,climate fever,cls/fact checking,,,,Diggelmann et al. 2020
147
+ art,,,,,,,,,,,,,,,,https://arxiv.org/pdf/1908.05739.pdf,,skip: NLI reserved for generalization studies (although this one is not a traditionally defined NLI),art (abductive nli),other,,,,Bhagavatula et al. 2020
148
+ glue,mnli,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-mnli,cls/nli,,,,Williams et al. 2018
149
+ glue,qnli,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-qnli,cls/nli,,,,Rajpurkar et al. 2016
150
+ glue,rte,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-rte,cls/nli,,,,Dagan et al. 2005; Bar-Haim et al. 2006 Giampiccolo et al. 2007; Bentivogli et al. 2009
151
+ glue,wnli,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-wnli,cls/nli,,,,Levesque et al. 2012
152
+ ,,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,scitail,cls/nli,,,,Khot et al. 2018
153
+ ,,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,sick,cls/nli,,,,Marelli et al. 2014
154
+ ,,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,SNLI (Bowman et al. 2015),NLI,,,misc.,
155
+ aeslc,,,,summarization by email subject line,,,,,,,,,,,,https://arxiv.org/abs/1906.03497,,skip: niche task,aeslc,cg/summarization,,generation,,Zhang and Tetreault 2019
156
+ onestop_english,,,,,,,,,,,,,,,,https://aclanthology.org/W18-0535.pdf,,skip: niche task: classify curriculum diffculty,onestop english,cls/other,,,,Vajjala and Luˇci´c 2018
157
+ mocha,,,,,,,,,,,,,,,,,,skip: model generated text,mocha,other/regression,,,,Chen et al. 2020a
158
+ commonsense_qa,,,,duplicate with cos_e,Vania,,,,9741,,,,,,,https://arxiv.org/pdf/1811.00937.pdf,,,Commonsense QA,qa/multiple-choice qa,,,,Talmor et al. 2019
159
+ ,,,,,,,,,,,,,,,,,,skip: maybe harmful content from Twitter,emotion,cls/emotion,,,,Saravia et al. 2018
160
+ ,,,,the authors themselves seem to have renounced their own work,,,,,,,,,,,,https://github.com/nyu-mll/crows-pairs,,skip: harmful content,crows pairs,other,,,,Nangia et al. 2020
161
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-directed vs generalized,cls/hate speech detection,,,,Mollas et al. 2020
162
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-disability,cls/hate speech detection,,,,Mollas et al. 2020
163
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-gender,cls/hate speech detection,,,,Mollas et al. 2020
164
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-national origin,cls/hate speech detection,,,,Mollas et al. 2020
165
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-race,cls/hate speech detection,,,,Mollas et al. 2020
166
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-religion,cls/hate speech detection,,,,Mollas et al. 2020
167
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-sexual orientation,cls/hate speech detection,,,,Mollas et al. 2020
168
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,hate speech offensive,cls/hate speech detection,,,,Davidson et al. 2017
169
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,hate speech18,cls/hate speech detection,,,,de Gibert et al. 2018
170
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,hatexplain,cls/hate speech detection,,,,Mathew et al. 2020
171
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,reddit tifu-title,cg/summarization,,,,Kim et al. 2019
172
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,reddit tifu-tldr,cg/summarization,,,,Kim et al. 2019
173
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-emoji,cls/emotion,,,,Barbieri et al. 2020
174
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-emotion,cls/emotion,,,,Barbieri et al. 2020
175
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-hate,cls/emotion,,,,Barbieri et al. 2020
176
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-irony,cls/emotion,,,,Barbieri et al. 2020
177
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-offensive,cls/emotion,,,,Barbieri et al. 2020
178
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-sentiment,cls/emotion,,,,Barbieri et al. 2020
179
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance abortion,cls/emotion,,,,Barbieri et al. 2020
180
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance atheism,cls/emotion,,,,Barbieri et al. 2020
181
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance climate,cls/emotion,,,,Barbieri et al. 2020
182
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance feminist,cls/emotion,,,,Barbieri et al. 2020
183
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance hillary,cls/emotion,,,,Barbieri et al. 2020
184
+ ,,,,,,,,,,,,,,,,,,skip: harmful content,tweet qa,qa/machine reading comprehension,,,,Xiong et al. 2019
185
+ yelp_polarity,,,,,,,,,,,,,,,,,,skip: duplicate with yelp_review_full,yelp polarity,cls/sentiment analysis,,,,Zhang et al. 2015; (link)
186
+ quora,,,,,,,,,,,,,,,,https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs,,skip: duplicate under GLUE,QQP,paraphrase identification,,,social QA,Iyer et al. 2017
187
+ squad,,,,,,,,,,,,,,,,,,skip: duplicate under Squad 2.0,SQuAD 1.1,Extractive QA,,,,
188
+ yahoo_answers_topics,,,,,,,,,,,,,,,,,,skip for early experiments: unclean corpus,yahoo answers topics,cls/topic,,,,(link)
189
+ tab_fact,,,,,,,,,,,,,,,,,,skip for early experiments: tabular data,tab fact,cls/fact checking,,,,Chen et al. 2020b
190
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-anaphor gender agreement,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
191
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-anaphor number agreement,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
192
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-determiner noun agreement with adj irregular 1,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
193
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-ellipsis n bar 1,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
194
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-ellipsis n bar 2,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
195
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-existential there quantifiers 1,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
196
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-irregular past participle adjectives,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
197
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-sentential negation npi licensor present,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
198
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-sentential negation npi scope,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
199
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-wh questions object gap,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
200
+ poem_sentiment,,,,,,,,,,,,,,,,,,skip for early experiments: poetry domain,poem sentiment,cls/sentiment analysis,,creativity,,Sheng and Uthus 2020
201
+ acronym_identification,,,,,,,,,,,,,,,,https://arxiv.org/pdf/2010.14678.pdf,,skip for early experiments: niche/hard task,acronym identification,other,,,,Pouran Ben Veyseh et al. 2020
202
+ google_wellformed_query,,,,revisit whether to exclude fine-grain regression tasks,,,,,,,,,,,,,,skip for early experiments: niche/hard task,google wellformed query,cls/other,,,,Faruqui and Das 2018
203
+ liar,,,,revisit whether to exclude fine-grain regression tasks,,,,,,,,,,,,,,skip for early experiments: niche/hard task,liar,cls/fact checking,,,,Wang 2017
204
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: niche/hard task,break-QDMR-high-level,other,,semantic representation,,Wolfson et al. 2020
205
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: niche/hard task,crawl domain,other,,,,Zhang et al. 2020
206
+ discovery,discovery,,,,,,,,,,,,,,,,,skip for early experiments: niche task no cannonical answer,discovery,cls/other,,generative-ish,,Sileo et al. 2019
207
+ wiki_split,,,,,,,,,,,,,,,,,,skip for early experiments: niche task,wiki split,cg/other,,,,Botha et al. 2018
208
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: multilingual,aslg pc12,other,,,,Othman and Jemni 2012
209
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,CCG (Hockenmaier and Steedman 2007),CCG supertagging,,syntax,Penn Treebank,
210
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,Chunk (Tjong Kim Sang and Buchholz 2000),syntactic chunking,,syntax,Penn Treebank,
211
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,Conj (Ficler and Goldberg 2016),conjunct identification,,syntax,Penn Treebank,
212
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,GED (Yannakoudakis et al. 2011),grammatical error detection,,syntax,misc.,
213
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,GGParent (Liu et al. 2019a),syntactic tagging,,syntax,Penn Treebank,
214
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,GParent (Liu et al. 2019a),syntactic tagging,,syntax,Penn Treebank,
215
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,NER (Tjong Kim Sang and De Meulder 2003),named entity recognition,,,news,
216
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,Parent (Liu et al. 2019a),syntactic tagging,,syntax; constituency,Penn Treebank,
217
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,POS-EWT (Silveira et al. 2014),part-of-speech tagging,,syntax,Web Treebank,
218
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,POS-PTB (Marcus et al. 1993),part-of-speech tagging,,syntax,Penn Treebank,
219
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,ST (Bjerva et al. 2016),semantic tagging,,,Groningen Meaning Bank,
220
+ financial_phrasebank,,,,,,,,,,,,,,,,,,skip for early experiments: financial domain,financial phrasebank,cls/sentiment analysis,,,,Malo et al. 2014
221
+ health_fact,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,health fact,cls/fact checking,,,,Kotonya and Toni 2020
222
+ ,,,,,,,,,,,,,,,,http://www.sciencedirect.com/science/article/pii/S1532046412000615,,skip for early experiments: biomedical domain,ade corpus v2-classification,cls/other,,,,Gurulingappa et al. 2012
223
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,ade corpus v2-dosage,other/slot filling,,,,Gurulingappa et al. 2012
224
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,ade corpus v2-effect,other/slot filling,,,,Gurulingappa et al. 2012
225
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,biomrc,qa/machine reading comprehension,,,,Pappas et al. 2020
226
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,medical questions pairs,cls/paraphrase,,,,McCreery et al. 2020
227
+ scicite,,,,,,,,,,,,,,,,,,skip for early experiments: academic domain + niche/hard task,scicite,cls/other,,,,Cohan et al. 2019
228
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: abstract semantic representations,break-QDMR,other,,logical form,,Wolfson et al. 2020
229
+ ,,,,,,,,,,,,,,,,,,skip for early experiments: abstract semantic representations,e2e nlg cleaned,other,,,,Duˇsek et al. 2020 2019
230
+ glue,sst2,,,,,,,,,,,,,,,,,revisit: very short and often ill-formed movie reviews,glue-sst2,cls/sentiment analysis,,,,Socher et al. 2013
231
+ glue,stsb,fine-grain regression,,,,,,,,,,,,,,,,revisit whether to exclude fine-grain regression tasks,glue-stsb,semantic similarity,,,misc.,
232
+ ,,,,,,,,,,,,,,,,,,double check: subset missing from HF datasets,squad-no context,qa/closed-book qa,,,,Rajpurkar et al. 2016
233
+ ,,,,,,,,,,,,,,,,,,double check: subset missing from HF datasets,squad-with context,qa/machine reading comprehension,,,,Rajpurkar et al. 2016
234
+ ,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,BoolQ-CS,Binary yes/no,,,,
235
+ ,,,,,,,,,,,,,,,,https://aclanthology.org/C16-1236.pdf,,double check: missing from HF datasets,CQ (Bao et al. 2016),knowledge-based QA,,,snippets web queries/KB,
236
+ ,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,DROP-CS,Abstractive QA,,,,
237
+ ,,,,,,,,,,,,,,,,https://aclanthology.org/D13-1020.pdf,,double check: missing from HF datasets,MCTest,Multiple choice,,,,
238
+ ,,,,,,,,,,,,,,,,,,double check: missing from HF datasets,MRPC (Dolan and Brockett 2005),paraphrase identification,,,news,
239
+ ,,,,"""naturally perturbed"" version of BoolQ",,,,,,,,,,,,https://arxiv.org/pdf/2004.04849.pdf,,double check: missing from HF datasets,NP-BoolQ,Binary yes/no,,,,
240
+ ,,,,,,,,,,,,,,,,https://aclanthology.org/D19-1608.pdf,,double check: missing from HF datasets,quartz-no knowledge,qa/multiple-choice qa,,,,Tafjord et al. 2019b
241
+ ,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,Quoref-CS,Extractive QA,,,,
242
+ ,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,ROPES-CS,Extractive QA,,,,
promptsource/seqio_tasks/preview_annotated_prompts.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ from pprint import pprint
3
+ from typing import Dict, List
4
+
5
+ import pkg_resources
6
+ from t5.data.glue_utils import get_glue_metric, get_super_glue_metric
7
+ from t5.evaluation.metrics import accuracy, mean_multiclass_f1, rouge
8
+
9
+
10
+ SAFE_EXCLUDE_CRETERIA = [
11
+ "template_bug",
12
+ "negated_answers",
13
+ "counting",
14
+ "answer_span_indices",
15
+ "non_natural_language",
16
+ "generative_non_true_implausible",
17
+ ]
18
+
19
+ AGGRESSIVE_EXCLUDE_CRETERIA = [
20
+ "generative_non_true_task",
21
+ "nontrivial_choices_hidden",
22
+ "awkward_phrasing",
23
+ "ungrammatical",
24
+ ] + SAFE_EXCLUDE_CRETERIA
25
+
26
+
27
+ NON_GLUE_METRICS = { # for those with do_eval = True
28
+ "anli": [accuracy],
29
+ "hans": [accuracy],
30
+ "circa_goldstandard1_judgement": [mean_multiclass_f1(num_classes=8), accuracy],
31
+ "circa_goldstandard2_judgement": [mean_multiclass_f1(num_classes=5), accuracy],
32
+ "mc_taco": [accuracy],
33
+ "nq_open": [accuracy],
34
+ "qa_srl": [accuracy],
35
+ "openbookqa": [accuracy],
36
+ "race": [accuracy],
37
+ "social_i_qa": [accuracy],
38
+ "emo": [mean_multiclass_f1(num_classes=4)],
39
+ "xsum": [rouge],
40
+ }
41
+
42
+
43
+ def exclude_bad_prompts(prompt: Dict) -> bool:
44
+ for criterion in SAFE_EXCLUDE_CRETERIA: # or AGGRESSIVE_EXCLUDE_CRETERIA
45
+ if prompt.get(criterion):
46
+ return False
47
+ return True
48
+
49
+
50
+ def load_annotated_prompts() -> List[Dict]:
51
+ annotated_csv_path = pkg_resources.resource_filename(__name__, "experiment_D3.csv")
52
+ with open(annotated_csv_path) as in_file:
53
+ reader = csv.DictReader(in_file)
54
+ all_tasks = [row for row in reader]
55
+
56
+ clean_tasks = list(filter(exclude_bad_prompts, all_tasks))
57
+
58
+ # Assign metrics
59
+ non_glue_eval_sets = list(NON_GLUE_METRICS.keys())
60
+ for task in clean_tasks:
61
+ if not task["do_eval"]:
62
+ continue
63
+
64
+ full_name = task["dataset_subset_template"]
65
+ if full_name.startswith("glue"):
66
+ subset = full_name.split("_")[1]
67
+ task["metrics"] = get_glue_metric(subset)
68
+ elif full_name.startswith("super_glue"):
69
+ subset = full_name.split("_")[2]
70
+ if subset in ("wsc.fixed", "multirc"):
71
+ # TODO: WSC and MultiRC need special pre/postprocesing
72
+ task["metrics"] = [accuracy]
73
+ continue
74
+ task["metrics"] = get_super_glue_metric(subset)
75
+
76
+ for dataset_name in non_glue_eval_sets:
77
+ if full_name.startswith(dataset_name):
78
+ task["metrics"] = NON_GLUE_METRICS[dataset_name]
79
+
80
+ # Skip rank_classification for now until we actually support it
81
+ # if task["nontrivial_choices_hidden"]:
82
+ # # Trick of plugging in answer options and rank LM probabilites as predictions.
83
+ # # Required for all prompts with non_trivial_choices_hidden,
84
+ # # but could be used for other tasks as well where answer choices are given.
85
+ # if "metrics" not in task:
86
+ # task["metrics"] = [rank_classification]
87
+ # elif rank_classification not in task["metrics"]:
88
+ # task["metrics"].append(rank_classification)
89
+
90
+ # should be already handled by NON_GLUE_METRICS
91
+ # if task['generative_true_task'] or task['generative_non_true_task']:
92
+ # task['metrics'] = rouge
93
+
94
+ return clean_tasks
95
+
96
+
97
+ def preview() -> None:
98
+ clean_tasks = load_annotated_prompts()
99
+
100
+ train_tasks = [t for t in clean_tasks if not t["skip_train"]]
101
+ eval_tasks = [t for t in clean_tasks if t["do_eval"]]
102
+
103
+ pprint([t["dataset_subset_template"] for t in train_tasks])
104
+ print(len(train_tasks))
105
+
106
+ pprint([f'{t["dataset_subset_template"]} {t["metrics"]}' for t in eval_tasks])
107
+ print(len(eval_tasks))
108
+
109
+
110
+ if __name__ == "__main__":
111
+ preview()
promptsource/seqio_tasks/preview_promptsource.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ from typing import List, Optional, Tuple
3
+
4
+ import pkg_resources
5
+
6
+ # from rich import inspect
7
+ from rich.pretty import pprint
8
+
9
+ from promptsource.templates import TemplateCollection
10
+
11
+
12
+ def preview() -> None:
13
+ experiment_path = pkg_resources.resource_filename(__name__, "experiment_D4.csv")
14
+ gsheet = {}
15
+ d4_train: List[Tuple[str, Optional[str]]] = []
16
+ d4_eval: List[Tuple[str, Optional[str]]] = []
17
+ d3_train_gpt: List[Tuple[str, Optional[str]]] = []
18
+ d3_train_sglue: List[Tuple[str, Optional[str]]] = []
19
+ experiment_path = pkg_resources.resource_filename(__name__, "experiment_D4.csv")
20
+ with open(experiment_path) as exp_file:
21
+ reader = csv.DictReader(exp_file)
22
+ for row in reader:
23
+ if row["skip"]:
24
+ continue
25
+ if row["subset"] == "":
26
+ row["subset"] = None # to match promptsource.Template object
27
+ dataset_subset = (row["HF_name"], row["subset"])
28
+ if row["do_train"] == "TRUE":
29
+ d4_train.append(dataset_subset)
30
+ if row["do_eval"] == "TRUE":
31
+ d4_eval.append(dataset_subset)
32
+ if row["D3_do_train"] == "TRUE" and "GPT" in row["seed_paper"]:
33
+ d3_train_gpt.append(dataset_subset)
34
+ if row["D3_do_train"] == "TRUE" and row["HF_name"] == "super_glue":
35
+ d3_train_sglue.append(dataset_subset)
36
+ gsheet[dataset_subset] = row
37
+ all_datasets = d4_train + d4_eval + d3_train_gpt + d3_train_sglue
38
+ print(f"Number of non-desk-rejected datasets = {len(all_datasets)}")
39
+ print(f"Number of training sets = {len(d4_train)}")
40
+ print(f"Number of evaluation sets = {len(d4_eval)}")
41
+
42
+ template_collection = TemplateCollection()
43
+ output = []
44
+ missing_og_flags = []
45
+ missing_metrics = []
46
+ for dataset_name, subset_name in template_collection.keys:
47
+ ds_name = (dataset_name, subset_name)
48
+ if ds_name not in d4_eval:
49
+ template_collection.remove(dataset_name, subset_name)
50
+ continue
51
+ OG = 0
52
+ non_OG = 0
53
+ dataset = template_collection.get_dataset(dataset_name, subset_name)
54
+ for template_name in dataset.all_template_names:
55
+ template = dataset[template_name]
56
+ # if dataset_name == 'ropes':
57
+ # inspect(template.metadata)
58
+ if not template.metadata.metrics:
59
+ missing_metrics.append(f"{dataset_name}/{subset_name}/{template_name}")
60
+
61
+ if template.metadata.original_task is True:
62
+ OG += 1
63
+ elif template.metadata.original_task is False:
64
+ non_OG += 1
65
+ elif template.metadata.original_task is None:
66
+ missing_og_flags.append(dataset_name + "/" + template_name)
67
+ continue
68
+
69
+ train_size = gsheet[ds_name]["train_size"]
70
+ if train_size == "":
71
+ train_size = 0
72
+ else:
73
+ train_size = int(train_size)
74
+
75
+ adjusted_train_size = train_size // len(dataset.all_template_names)
76
+
77
+ output.append(
78
+ (
79
+ f"{dataset_name} {subset_name if subset_name else ''}",
80
+ f"{OG}-{non_OG}",
81
+ f"{train_size:,} {adjusted_train_size:,}",
82
+ )
83
+ )
84
+
85
+ pprint(output)
86
+ print(len(template_collection))
87
+
88
+ print("Missing metrics:")
89
+ pprint(missing_metrics)
90
+
91
+ print("Missing original task flags:")
92
+ pprint(missing_og_flags)
93
+
94
+ # # print(d4_train_mixture)
95
+ # print(f"Number of training templates = {len(d4_train_mixture)}")
96
+ # # print(d4_eval_mixture)
97
+ # print(f"Number of evaluation templates = {len(d4_eval_mixture)}")
98
+ # # for i in seqio.TaskRegistry.names():
99
+ # # print(i)
100
+ # print(f"Number of SeqIO registered templates = {len(seqio.TaskRegistry.names())}")
101
+ # print("^ includes non-original task templates which are excluded from the eval mixture")
102
+
103
+
104
+ if __name__ == "__main__":
105
+ preview()
promptsource/seqio_tasks/tasks.py ADDED
@@ -0,0 +1,421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import functools
3
+ from typing import Dict, List, Optional, Tuple
4
+
5
+ import datasets
6
+ import pkg_resources
7
+ import seqio
8
+ import t5
9
+ import tensorflow as tf
10
+ from t5.data.glue_utils import get_glue_metric, get_super_glue_metric
11
+ from t5.evaluation import metrics as mt
12
+
13
+ import promptsource.templates
14
+ from promptsource.seqio_tasks import utils
15
+
16
+
17
+ GET_METRICS = {
18
+ "BLEU": mt.bleu,
19
+ "ROUGE": mt.rouge,
20
+ "Span Squad": mt.span_squad,
21
+ "Squad": mt.squad,
22
+ "Trivia QA": mt.trivia_qa,
23
+ "Accuracy": mt.accuracy,
24
+ "Sequence Accuracy": mt.sequence_accuracy,
25
+ "Pearson Correlation": mt.pearson_corrcoef,
26
+ "Spearman Correlation": mt.spearman_corrcoef,
27
+ "MultiRC": mt.multirc_f1_over_all_answers,
28
+ "AUC": mt.auc,
29
+ "COQA F1": mt.coqa_f1,
30
+ "Edit Distance": mt.edit_distance,
31
+ # "Mean Reciprocal Rank": mt.accuracy, # NOTE not in T5?
32
+ "Other": mt.accuracy,
33
+ # Missing support for mean_multiclass_f1 etc. which need a num_classes parameter
34
+ }
35
+
36
+ MAX_EXAMPLES_PER_DATASET = 500_000
37
+
38
+
39
+ def strip_whitespace(output_or_target, example=None, is_target=False):
40
+ """Cached tasks from promptsource all have a leading space on the ground-truth targets."""
41
+ return output_or_target.strip()
42
+
43
+
44
+ def maybe_get_class_id_postprocessor(template):
45
+ if template.get_fixed_answer_choices_list():
46
+
47
+ def postprocess_fn(output_or_target, example=None, is_target=False):
48
+ output_or_target = strip_whitespace(output_or_target)
49
+ return t5.data.postprocessors.string_label_to_class_id(
50
+ output_or_target, label_classes=template.get_fixed_answer_choices_list()
51
+ )
52
+
53
+ return postprocess_fn
54
+
55
+ else:
56
+ return strip_whitespace
57
+
58
+
59
+ def get_tf_dataset(split, shuffle_files, seed, dataset_name, subset_name, template, split_mapping):
60
+ # HF datasets does not support file-level shuffling
61
+ del shuffle_files, seed
62
+ dataset = datasets.load_dataset(dataset_name, subset_name)
63
+ dataset = dataset[split_mapping[split]]
64
+ dataset = utils.apply_template(dataset, template)
65
+ return utils.hf_dataset_to_tf_dataset(dataset)
66
+
67
+
68
+ def add_task(dataset_name, subset_name, template_name, task_name=None, split_mapping=None):
69
+ template = all_templates.get_dataset(dataset_name, subset_name)[template_name]
70
+ task_name = task_name or utils.get_task_name(dataset_name, subset_name, template_name)
71
+
72
+ if dataset_name == "glue":
73
+ metrics = get_glue_metric(subset_name)
74
+ elif dataset_name == "super_glue":
75
+ if subset_name in ("wsc.fixed", "multirc"):
76
+ # TODO: WSC and MultiRC need special pre/postprocesing
77
+ metrics = [mt.accuracy]
78
+ else:
79
+ metrics = get_super_glue_metric(subset_name)
80
+ else:
81
+ # TODO what if metric is null?
82
+ metrics = [GET_METRICS[m] for m in template.metadata.metrics]
83
+
84
+ dataset_splits = utils.get_dataset_splits(dataset_name, subset_name)
85
+ split_mapping = split_mapping or {k: k for k in dataset_splits.keys()}
86
+
87
+ dataset_fn = functools.partial(
88
+ get_tf_dataset,
89
+ seed=None,
90
+ dataset_name=dataset_name,
91
+ subset_name=subset_name,
92
+ template=template,
93
+ split_mapping=split_mapping,
94
+ )
95
+ data_source = seqio.FunctionDataSource(
96
+ dataset_fn,
97
+ splits=list(split_mapping.keys()),
98
+ num_input_examples={s: dataset_splits[split_mapping[s]].num_examples for s in split_mapping.keys()},
99
+ )
100
+ output_features = {
101
+ "inputs": seqio.Feature(t5.data.get_default_vocabulary(), add_eos=False, dtype=tf.int32),
102
+ "targets": seqio.Feature(t5.data.get_default_vocabulary(), add_eos=True, dtype=tf.int32),
103
+ }
104
+ preprocessors = [
105
+ seqio.preprocessors.tokenize,
106
+ seqio.preprocessors.append_eos,
107
+ seqio.CacheDatasetPlaceholder(required=False),
108
+ ]
109
+
110
+ # Add train and normal eval tasks
111
+ seqio.TaskRegistry.add(
112
+ task_name,
113
+ data_source,
114
+ preprocessors=preprocessors,
115
+ output_features=output_features,
116
+ metric_fns=metrics,
117
+ postprocess_fn=maybe_get_class_id_postprocessor(template),
118
+ )
119
+
120
+ # Add rank classification eval task
121
+ if template.answer_choices:
122
+ rank_classification_preprocessor = functools.partial(
123
+ t5.data.preprocessors.rank_classification,
124
+ inputs_fn=lambda ex: tf.fill((len(ex["answer_choices"]),), ex["inputs"]),
125
+ targets_fn=lambda ex: ex["answer_choices"],
126
+ is_correct_fn=lambda ex: tf.equal(ex["answer_choices"], tf.strings.strip(ex["targets"])),
127
+ weight_fn=lambda ex: 1.0,
128
+ )
129
+
130
+ fixed_choices = template.get_fixed_answer_choices_list()
131
+ num_classes = len(fixed_choices) if fixed_choices else None
132
+ seqio.TaskRegistry.add(
133
+ task_name + "_score_eval",
134
+ data_source,
135
+ preprocessors=[rank_classification_preprocessor] + preprocessors,
136
+ output_features=output_features,
137
+ metric_fns=[functools.partial(t5.evaluation.metrics.rank_classification, num_classes=num_classes)],
138
+ postprocess_fn=t5.data.postprocessors.rank_classification,
139
+ )
140
+
141
+
142
+ datatset_subset_tuple = Tuple[str, Optional[str]]
143
+ d4_train: List[datatset_subset_tuple] = []
144
+ d4_eval: List[datatset_subset_tuple] = []
145
+ d3_train_gpt: List[datatset_subset_tuple] = []
146
+ d3_train_sglue: List[datatset_subset_tuple] = []
147
+ bias_fairness_eval: List[datatset_subset_tuple] = []
148
+ gsheet: Dict[datatset_subset_tuple, Dict] = {}
149
+ experiment_path = pkg_resources.resource_filename(__name__, "experiment_D4.csv")
150
+ with open(experiment_path) as exp_file:
151
+ reader = csv.DictReader(exp_file)
152
+ for row in reader:
153
+ if row["skip"]:
154
+ continue
155
+ if row["subset"] == "":
156
+ row["subset"] = None # to match promptsource.Template object
157
+ dataset_subset = (row["HF_name"], row["subset"])
158
+ if row["do_train"] == "TRUE":
159
+ d4_train.append(dataset_subset)
160
+ if row["do_eval"] == "TRUE":
161
+ d4_eval.append(dataset_subset)
162
+ if row["D3_do_train"] == "TRUE" and "GPT" in row["seed_paper"]:
163
+ d3_train_gpt.append(dataset_subset)
164
+ if row["D3_do_train"] == "TRUE" and row["HF_name"] == "super_glue":
165
+ d3_train_sglue.append(dataset_subset)
166
+ if (
167
+ row["do_eval"] == "TRUE"
168
+ and row["task_by_convention"] == "bias_and_fairness"
169
+ and row["HF_name"] != "winogender"
170
+ ):
171
+ bias_fairness_eval.append(dataset_subset)
172
+ gsheet[dataset_subset] = row
173
+ all_datasets = d4_train + d4_eval + d3_train_gpt + d3_train_sglue + bias_fairness_eval
174
+
175
+ all_templates = promptsource.templates.TemplateCollection()
176
+ all_templates.remove("anli") # Need to special-case ANLI due to weird split conventions
177
+
178
+ # 3 stages of training/ablation: D4 -> GPT -> SuperGLUE
179
+ d4_train_mixture: List[str] = [] # strings are dataset_subset_template
180
+ gpt_train_mixture: List[str] = []
181
+ sglue_train_mixture: List[str] = []
182
+ d4_eval_mixture: List[str] = []
183
+ bias_fairness_eval_mixture: List[str] = []
184
+ mixture_cap: Dict[str, int] = {}
185
+ single_original_task: Dict[Tuple[str, str], str] = {}
186
+ all_original_tasks: List[str] = []
187
+ for dataset_name, subset_name in all_templates.keys:
188
+ if (dataset_name, subset_name) not in all_datasets:
189
+ all_templates.remove(dataset_name, subset_name)
190
+ continue
191
+
192
+ dataset = all_templates.get_dataset(dataset_name, subset_name)
193
+ num_templates = len(dataset.all_template_names)
194
+ train_size = gsheet[(dataset_name, subset_name)]["train_size"]
195
+ if train_size == "":
196
+ train_size = 0
197
+ else:
198
+ train_size = int(train_size)
199
+ if train_size > MAX_EXAMPLES_PER_DATASET:
200
+ cap = MAX_EXAMPLES_PER_DATASET // num_templates
201
+ else:
202
+ cap = train_size
203
+ for template_name in dataset.all_template_names:
204
+ add_task(dataset_name, subset_name, template_name)
205
+
206
+ template = dataset[template_name]
207
+
208
+ task_name = utils.get_task_name(dataset_name, subset_name, template_name)
209
+
210
+ if (dataset_name, subset_name) not in single_original_task and template.metadata.original_task:
211
+ single_original_task[(dataset_name, subset_name)] = task_name
212
+
213
+ if template.metadata.original_task:
214
+ all_original_tasks.append(task_name)
215
+
216
+ if (dataset_name, subset_name) in d4_train:
217
+ d4_train_mixture.append(task_name)
218
+ mixture_cap[task_name] = cap
219
+ if (dataset_name, subset_name) in d3_train_gpt:
220
+ gpt_train_mixture.append(task_name)
221
+ mixture_cap[task_name] = cap
222
+ if (dataset_name, subset_name) in d3_train_sglue:
223
+ sglue_train_mixture.append(task_name)
224
+ mixture_cap[task_name] = cap
225
+ if (dataset_name, subset_name) in d4_eval:
226
+ if template.metadata.original_task:
227
+ d4_eval_mixture.append(task_name)
228
+ # TODO use template.metadata.answer_choices here for rank eval
229
+ if (dataset_name, subset_name) in bias_fairness_eval:
230
+ bias_fairness_eval_mixture.append(task_name)
231
+
232
+ # Special case for ANLI, which has weirdly-named splits and rounds that should be subsets
233
+ dataset_name, subset_name = ("anli", None)
234
+ dataset = all_templates.get_dataset(dataset_name, subset_name)
235
+ for anli_round in ("r1", "r2", "r3"):
236
+ for template_name in all_templates.get_dataset(dataset_name, subset_name).all_template_names:
237
+ task_name = utils.get_task_name(dataset_name, subset_name, template_name) + f"_{anli_round}"
238
+ split_mapping = {
239
+ "train": f"train_{anli_round}",
240
+ "validation": f"dev_{anli_round}",
241
+ "test": f"test_{anli_round}",
242
+ }
243
+ add_task(dataset_name, subset_name, template_name, task_name, split_mapping)
244
+
245
+ template = dataset[template_name]
246
+ if template.metadata.original_task:
247
+ d4_eval_mixture.append(task_name) # TODO or add to ANLI special mixture
248
+ # TODO use template.metadata.answer_choices here for rank eval
249
+
250
+
251
+ TASK_BLACKLIST = [
252
+ # Tasks which often tokenize to > 1024 tokens currently
253
+ "hotpot_qa_distractor_Generate_Explanations",
254
+ "hotpot_qa_fullwiki_Generate_Explanations",
255
+ "hotpot_qa_distractor_Generate_Answer_and_Explanations",
256
+ "hotpot_qa_fullwiki_Generate_Answer_and_Explanations",
257
+ "hotpot_qa_fullwiki_Generate_Answer",
258
+ "hotpot_qa_distractor_Generate_Answer",
259
+ "hotpot_qa_distractor_Generate_Title_2",
260
+ "hotpot_qa_fullwiki_Generate_Title_2",
261
+ "hotpot_qa_fullwiki_Generate_Title_1",
262
+ "hotpot_qa_distractor_Generate_Title_1",
263
+ "hotpot_qa_distractor_Generate_Question",
264
+ "hotpot_qa_fullwiki_Generate_Question",
265
+ "tab_fact_tab_fact_tab_fact_3",
266
+ "tab_fact_tab_fact_tab_fact_2",
267
+ "tab_fact_tab_fact_tab_fact_1",
268
+ "tab_fact_tab_fact_tab_fact_7",
269
+ "tab_fact_tab_fact_tab_fact_4",
270
+ "tab_fact_tab_fact_tab_fact_5",
271
+ "tab_fact_tab_fact_tab_fact_6",
272
+ "wiki_hop_masked_Choose_Best_Object_Candidate",
273
+ "wiki_hop_masked_Indirect_Question_about_Birthplace_Citizenship_Place_of_Death",
274
+ "narrativeqa_Template_05",
275
+ "ecthr_cases_alleged_violation_prediction_silver_rationales",
276
+ # Tasks with broken cached files
277
+ "gigaword_summarize_",
278
+ ]
279
+
280
+ # Tasks that failed caching (won't try to fix them for now) - remove when we are done
281
+ D4_TRAIN_SCORE_EVAL_TASK_BLACKLIST = [
282
+ "amazon_polarity_Is_this_product_review_positive_score_eval",
283
+ "amazon_polarity_Is_this_review_negative_score_eval",
284
+ "amazon_polarity_Is_this_review_score_eval",
285
+ "amazon_polarity_User_recommend_this_product_score_eval",
286
+ "amazon_polarity_convey_negative_or_positive_sentiment_score_eval",
287
+ "amazon_polarity_flattering_or_not_score_eval",
288
+ "amazon_polarity_negative_or_positive_tone_score_eval",
289
+ "amazon_polarity_user_satisfied_score_eval",
290
+ "amazon_polarity_would_you_buy_score_eval",
291
+ "dbpedia_14_given_a_choice_of_categories__score_eval",
292
+ "dbpedia_14_given_list_what_category_does_the_paragraph_belong_to_score_eval",
293
+ "dbpedia_14_pick_one_category_for_the_following_text_score_eval",
294
+ "wiki_hop_original_choose_best_object_affirmative_1_score_eval",
295
+ "wiki_hop_original_choose_best_object_affirmative_2_score_eval",
296
+ "wiki_hop_original_choose_best_object_affirmative_3_score_eval",
297
+ "wiki_hop_original_choose_best_object_interrogative_1_score_eval",
298
+ "wiki_hop_original_choose_best_object_interrogative_2_score_eval",
299
+ ]
300
+
301
+ seqio.MixtureRegistry.add(
302
+ "d4_train",
303
+ [task for task in d4_train_mixture if task not in TASK_BLACKLIST],
304
+ default_rate=lambda t: mixture_cap[t.name],
305
+ )
306
+
307
+ seqio.MixtureRegistry.add(
308
+ "gpt_train",
309
+ [task for task in gpt_train_mixture if task not in TASK_BLACKLIST],
310
+ default_rate=lambda t: mixture_cap[t.name],
311
+ )
312
+
313
+ seqio.MixtureRegistry.add(
314
+ "sglue_train",
315
+ [task for task in sglue_train_mixture if task not in TASK_BLACKLIST],
316
+ default_rate=lambda t: mixture_cap[t.name],
317
+ )
318
+
319
+ seqio.MixtureRegistry.add(
320
+ "d4_gpt_train",
321
+ [task for task in d4_train_mixture + gpt_train_mixture if task not in TASK_BLACKLIST],
322
+ default_rate=lambda t: mixture_cap[t.name],
323
+ )
324
+
325
+ seqio.MixtureRegistry.add(
326
+ "d4_gpt_sglue_train",
327
+ [task for task in d4_train_mixture + gpt_train_mixture + sglue_train_mixture if task not in TASK_BLACKLIST],
328
+ default_rate=lambda t: mixture_cap[t.name],
329
+ )
330
+
331
+ seqio.MixtureRegistry.add(
332
+ "d4_eval",
333
+ [task for task in d4_eval_mixture if task not in TASK_BLACKLIST],
334
+ default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
335
+ ) # eval mixture does not need to be capped
336
+
337
+
338
+ seqio.MixtureRegistry.add(
339
+ "d4_score_eval",
340
+ [
341
+ task
342
+ for task in seqio.TaskRegistry.names()
343
+ if task.endswith("_score_eval")
344
+ and task.split("_score_eval")[0] in d4_eval_mixture
345
+ and task.split("_score_eval")[0] not in TASK_BLACKLIST
346
+ ],
347
+ default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
348
+ )
349
+
350
+ # Train tasks we don't care about evaluating on
351
+ D4_TRAIN_SKIP_EVAL = [
352
+ "paws_labeled_final",
353
+ "adversarial_qa_dbidaf",
354
+ "adversarial_qa_dbert",
355
+ "duorc_ParaphraseRC",
356
+ "dream",
357
+ "amazon_polarity",
358
+ "app_reviews",
359
+ "imdb",
360
+ "wiki_bio",
361
+ "gigaword",
362
+ "multi_news",
363
+ "samsum",
364
+ "dbpedia_14",
365
+ "trec",
366
+ ]
367
+
368
+ seqio.MixtureRegistry.add(
369
+ "d4_train_eval",
370
+ [
371
+ task
372
+ for task in d4_train_mixture
373
+ if task not in TASK_BLACKLIST
374
+ and not any([skip in task for skip in D4_TRAIN_SKIP_EVAL])
375
+ and task in all_original_tasks
376
+ ],
377
+ default_rate=lambda t: mixture_cap[t.name],
378
+ )
379
+
380
+ seqio.MixtureRegistry.add(
381
+ "d4_train_score_eval",
382
+ [
383
+ task
384
+ for task in seqio.TaskRegistry.names()
385
+ if task.endswith("_score_eval")
386
+ and task.split("_score_eval")[0] in d4_train_mixture
387
+ and task.split("_score_eval")[0] not in TASK_BLACKLIST
388
+ and task not in D4_TRAIN_SCORE_EVAL_TASK_BLACKLIST
389
+ and not any([skip in task for skip in D4_TRAIN_SKIP_EVAL])
390
+ and task.split("_score_eval")[0] in all_original_tasks
391
+ ],
392
+ default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
393
+ )
394
+
395
+ seqio.MixtureRegistry.add(
396
+ "d4_train_one_og_prompt",
397
+ [task for task in single_original_task.values() if task in d4_train_mixture and task not in TASK_BLACKLIST],
398
+ default_rate=lambda t: mixture_cap[t.name],
399
+ )
400
+
401
+ seqio.MixtureRegistry.add(
402
+ "d4_train_all_og_prompts",
403
+ [task for task in all_original_tasks if task in d4_train_mixture and task not in TASK_BLACKLIST],
404
+ default_rate=lambda t: mixture_cap[t.name],
405
+ )
406
+
407
+ seqio.MixtureRegistry.add(
408
+ "bias_fairness_eval",
409
+ bias_fairness_eval_mixture,
410
+ default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
411
+ )
412
+
413
+ seqio.MixtureRegistry.add(
414
+ "bias_fairness_eval_score_eval",
415
+ [
416
+ task
417
+ for task in seqio.TaskRegistry.names()
418
+ if task.endswith("_score_eval") and task.split("_score_eval")[0] in bias_fairness_eval_mixture
419
+ ],
420
+ default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
421
+ )
promptsource/seqio_tasks/utils.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import datasets
4
+ import tensorflow as tf
5
+
6
+ import promptsource.utils
7
+
8
+
9
+ def feature_to_spec(feature, length=False):
10
+ if isinstance(feature, datasets.ClassLabel):
11
+ return tf.TensorSpec(shape=() if not length else (None if length == -1 else length,), dtype=tf.int64)
12
+ elif isinstance(feature, datasets.Value):
13
+ return tf.TensorSpec(
14
+ shape=() if not length else (None if length == -1 else length,), dtype=getattr(tf.dtypes, feature.dtype)
15
+ )
16
+ elif hasattr(feature, "dtype") and hasattr(feature, "shape"):
17
+ return tf.TensorSpec(shape=feature.shape, dtype=feature.dtype)
18
+ elif isinstance(feature, datasets.Sequence):
19
+ return feature_to_spec(feature.feature, length=feature.length)
20
+ elif isinstance(feature, list):
21
+ return [feature_to_spec(f, length=length) for f in feature]
22
+ elif isinstance(feature, dict):
23
+ return {k: feature_to_spec(v, length=length) for k, v in feature.items()}
24
+ else:
25
+ raise ValueError(f"Unparseable feature type {type(feature)}")
26
+
27
+
28
+ def hf_dataset_to_tf_dataset(dataset):
29
+ return tf.data.Dataset.from_generator(
30
+ dataset.__iter__, output_signature={k: feature_to_spec(v) for k, v in dataset.features.items()}
31
+ )
32
+
33
+
34
+ def apply_template(dataset, template):
35
+ def map_fn(ex):
36
+ ex = promptsource.utils.removeHyphen(ex)
37
+ inputs_and_targets = template.apply(ex)
38
+ answer_choices = template.get_answer_choices_list(ex)
39
+ if len(inputs_and_targets) == 2:
40
+ inputs, targets = inputs_and_targets
41
+ if targets == "":
42
+ ex = {"inputs": inputs, "targets": "<NO LABEL>"}
43
+ else:
44
+ ex = {"inputs": inputs, "targets": targets}
45
+ # When template results in an empty example, template.apply returns [""]
46
+ # Also, if the template gets split wrong, len can be > 2
47
+ # We will filter these out later
48
+ else:
49
+ ex = {"inputs": "", "targets": ""}
50
+
51
+ if answer_choices:
52
+ ex["answer_choices"] = answer_choices
53
+
54
+ return ex
55
+
56
+ def filter_fn(ex):
57
+ return len(ex["inputs"]) > 0 and len(ex["targets"]) > 0
58
+
59
+ original_columns = dataset.column_names
60
+ dataset = dataset.map(map_fn).filter(filter_fn)
61
+ # map keeps original columns, remove them
62
+ return dataset.remove_columns(set(original_columns) - {"inputs", "targets", "answer_choices"})
63
+
64
+
65
+ def get_dataset_splits(dataset_name, subset_name=None):
66
+ info = datasets.get_dataset_infos(dataset_name)
67
+ subset_name = subset_name or list(info.keys())[0]
68
+ return info[subset_name].splits
69
+
70
+
71
+ def task_clean(text):
72
+ # Clean the text according to allowed characters for a task name
73
+ return re.sub(r"[^\w\d\._]+", "_", text)
74
+
75
+
76
+ def get_task_name(dataset_name, subset_name, template_name):
77
+ return task_clean(dataset_name + (f"_{subset_name}_" if subset_name is not None else "_") + template_name)
promptsource/session.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Code for managing session state, which is needed for multi-input forms
3
+ # See https://github.com/streamlit/streamlit/issues/1557
4
+ #
5
+ # This code is taken from
6
+ # https://gist.github.com/okld/0aba4869ba6fdc8d49132e6974e2e662
7
+ #
8
+ from streamlit.hashing import _CodeHasher
9
+ from streamlit.report_thread import get_report_ctx
10
+ from streamlit.server.server import Server
11
+
12
+
13
+ class _SessionState:
14
+ def __init__(self, session, hash_funcs):
15
+ """Initialize SessionState instance."""
16
+ self.__dict__["_state"] = {
17
+ "data": {},
18
+ "hash": None,
19
+ "hasher": _CodeHasher(hash_funcs),
20
+ "is_rerun": False,
21
+ "session": session,
22
+ }
23
+
24
+ def __call__(self, **kwargs):
25
+ """Initialize state data once."""
26
+ for item, value in kwargs.items():
27
+ if item not in self._state["data"]:
28
+ self._state["data"][item] = value
29
+
30
+ def __getitem__(self, item):
31
+ """Return a saved state value, None if item is undefined."""
32
+ return self._state["data"].get(item, None)
33
+
34
+ def __getattr__(self, item):
35
+ """Return a saved state value, None if item is undefined."""
36
+ return self._state["data"].get(item, None)
37
+
38
+ def __setitem__(self, item, value):
39
+ """Set state value."""
40
+ self._state["data"][item] = value
41
+
42
+ def __setattr__(self, item, value):
43
+ """Set state value."""
44
+ self._state["data"][item] = value
45
+
46
+ def clear(self):
47
+ """Clear session state and request a rerun."""
48
+ self._state["data"].clear()
49
+ self._state["session"].request_rerun(None)
50
+
51
+ def sync(self):
52
+ """
53
+ Rerun the app with all state values up to date from the beginning to
54
+ fix rollbacks.
55
+ """
56
+ data_to_bytes = self._state["hasher"].to_bytes(self._state["data"], None)
57
+
58
+ # Ensure to rerun only once to avoid infinite loops
59
+ # caused by a constantly changing state value at each run.
60
+ #
61
+ # Example: state.value += 1
62
+ if self._state["is_rerun"]:
63
+ self._state["is_rerun"] = False
64
+
65
+ elif self._state["hash"] is not None:
66
+ if self._state["hash"] != data_to_bytes:
67
+ self._state["is_rerun"] = True
68
+ self._state["session"].request_rerun(None)
69
+
70
+ self._state["hash"] = data_to_bytes
71
+
72
+
73
+ def _get_session():
74
+ session_id = get_report_ctx().session_id
75
+ session_info = Server.get_current()._get_session_info(session_id)
76
+
77
+ if session_info is None:
78
+ raise RuntimeError("Couldn't get your Streamlit Session object.")
79
+
80
+ return session_info.session
81
+
82
+
83
+ def _get_state(hash_funcs=None):
84
+ session = _get_session()
85
+
86
+ if not hasattr(session, "_custom_session_state"):
87
+ session._custom_session_state = _SessionState(session, hash_funcs)
88
+
89
+ return session._custom_session_state
promptsource/templates.py ADDED
@@ -0,0 +1,515 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import uuid
4
+ from collections import Counter, defaultdict
5
+ from shutil import rmtree
6
+ from typing import Dict, List, Optional, Tuple
7
+
8
+ import pandas as pd
9
+ import pkg_resources
10
+ import yaml
11
+ from jinja2 import BaseLoader, Environment, meta
12
+
13
+
14
+ # Truncation of jinja template variables
15
+ # 1710 = 300 words x 4.7 avg characters per word + 300 spaces
16
+ TEXT_VAR_LENGTH = 2048
17
+
18
+ # Local path to the folder containing the templates
19
+ TEMPLATES_FOLDER_PATH = pkg_resources.resource_filename(__name__, "templates")
20
+
21
+ env = Environment(loader=BaseLoader)
22
+
23
+ # Allow the python function zip()
24
+ env.globals.update(zip=zip)
25
+
26
+ # These are users whose datasets should be included in the results returned by
27
+ # filter_english_datasets (regardless of their metadata)
28
+ INCLUDED_USERS = {"Zaid", "craffel"}
29
+
30
+
31
+ def highlight(input):
32
+ return "<span style='color: #F08080'>" + input + "</span>"
33
+
34
+
35
+ def choice(choices):
36
+ return random.choice(choices)
37
+
38
+
39
+ def most_frequent(items):
40
+ """Returns the set of items which appear most frequently in the input"""
41
+ if not items:
42
+ return
43
+ item_counts = Counter(items).most_common()
44
+ max_freq = item_counts[0][1]
45
+ most_frequent_items = [c[0] for c in item_counts if c[1] == max_freq]
46
+ return most_frequent_items
47
+
48
+
49
+ env.filters["highlight"] = highlight
50
+ env.filters["choice"] = choice
51
+ env.filters["most_frequent"] = most_frequent
52
+
53
+
54
+ class Template(yaml.YAMLObject):
55
+ """
56
+ A prompt template.
57
+ """
58
+
59
+ yaml_tag = "!Template"
60
+
61
+ def __init__(self, name, jinja, reference, metadata=None, answer_choices=None):
62
+ """
63
+ Creates a prompt template.
64
+
65
+ A prompt template is expressed in Jinja. It is rendered using an example
66
+ from the corresponding Hugging Face datasets library (a dictionary). The
67
+ separator ||| should appear once to divide the template into prompt and
68
+ output. Generally, the prompt should provide information on the desired
69
+ behavior, e.g., text passage and instructions, and the output should be
70
+ a desired response.
71
+
72
+ :param name: unique name (per dataset) for template
73
+ :param jinja: template expressed in Jinja
74
+ :param reference: string describing author or paper reference for template
75
+ :param metadata: a Metadata object with template annotations
76
+ :param answer_choices: Jinja expression for answer choices. Should produce
77
+ a ||| delimited string of choices that enumerates
78
+ the possible completions for templates that should
79
+ be evaluated as ranked completions. If None, then
80
+ the template is open-ended. This list is accessible
81
+ from within Jinja as the variable `answer_choices`.
82
+ """
83
+ self.id = str(uuid.uuid4())
84
+ self.name = name
85
+ self.jinja = jinja
86
+ self.reference = reference
87
+ self.metadata = metadata if metadata is not None else Template.Metadata()
88
+ self.answer_choices = answer_choices
89
+
90
+ def get_id(self):
91
+ """
92
+ Returns the id of the template
93
+
94
+ :return: unique id for template
95
+ """
96
+ return self.id
97
+
98
+ def get_name(self):
99
+ """
100
+ Returns the name of the template
101
+
102
+ :return: unique (per dataset) name for template
103
+ """
104
+ return self.name
105
+
106
+ def get_reference(self):
107
+ """
108
+ Returns the bibliographic reference (or author) for the template
109
+
110
+ :return: reference as a string
111
+ """
112
+ return self.reference
113
+
114
+ def get_answer_choices_expr(self):
115
+ """
116
+ Returns a Jinja expression for computing the answer choices from an example.
117
+
118
+ :return: String, or None if no answer choices
119
+ """
120
+ return self.answer_choices
121
+
122
+ def get_answer_choices_list(self, example):
123
+ """
124
+ Returns a list of answer choices for a given example
125
+
126
+ :return: list of strings, or None if get_answer_choices_expr is None
127
+ """
128
+ jinja = self.get_answer_choices_expr()
129
+ if jinja is None:
130
+ return None
131
+
132
+ rtemplate = env.from_string(jinja)
133
+ protected_example = self._escape_pipe(example)
134
+ rendered_choices = rtemplate.render(**protected_example)
135
+ return [self._unescape_pipe(answer_choice.strip()) for answer_choice in rendered_choices.split("|||")]
136
+
137
+ def get_fixed_answer_choices_list(self):
138
+ """
139
+ Returns a list of answer choices that is static across examples, if possible
140
+
141
+ :return: list of strings, or None if no static list exists
142
+ """
143
+ jinja = self.get_answer_choices_expr()
144
+ if jinja is None:
145
+ return None
146
+
147
+ parse = env.parse(jinja)
148
+ variables = meta.find_undeclared_variables(parse)
149
+ if len(variables) == 0:
150
+ rtemplate = env.from_string(jinja)
151
+ rendered_choices = rtemplate.render()
152
+ return [answer_choice.strip() for answer_choice in rendered_choices.split("|||")]
153
+ else:
154
+ return None
155
+
156
+ def apply(self, example, truncate=True, highlight_variables=False):
157
+ """
158
+ Creates a prompt by applying this template to an example
159
+
160
+ :param example: the dataset example to create a prompt for
161
+ :param truncate: if True, example fields will be truncated to TEXT_VAR_LENGTH chars
162
+ :param highlight_variables: highlight the added variables
163
+ :return: tuple of 2 strings, for prompt and output
164
+ """
165
+ jinja = self.jinja
166
+
167
+ # Truncates the prompt if needed
168
+ if truncate:
169
+ trunc_command = (
170
+ f" | string | truncate({TEXT_VAR_LENGTH}) }}}}" # Escaping curly braces requires doubling them
171
+ )
172
+ jinja = jinja.replace("}}", trunc_command)
173
+
174
+ # Highlights text that was substituted for variables, if requested
175
+ if highlight_variables:
176
+ jinja = jinja.replace("}}", " | highlight }}")
177
+ rtemplate = env.from_string(jinja)
178
+
179
+ protected_example = self._escape_pipe(example)
180
+
181
+ # Adds in answer_choices variable
182
+ if "answer_choices" in protected_example:
183
+ raise ValueError("Example contains the restricted key 'answer_choices'.")
184
+
185
+ protected_example["answer_choices"] = self.get_answer_choices_list(example)
186
+
187
+ # Renders the Jinja template
188
+ rendered_example = rtemplate.render(**protected_example)
189
+
190
+ # Splits on the separator, and then replaces back any occurrences of the
191
+ # separator in the original example
192
+ return [self._unescape_pipe(part).strip() for part in rendered_example.split("|||")]
193
+
194
+ pipe_protector = "3ed2dface8203c4c9dfb1a5dc58e41e0"
195
+
196
+ @classmethod
197
+ def _escape_pipe(cls, example):
198
+ # Replaces any occurrences of the "|||" separator in the example, which
199
+ # which will be replaced back after splitting
200
+ protected_example = {
201
+ key: value.replace("|||", cls.pipe_protector) if isinstance(value, str) else value
202
+ for key, value in example.items()
203
+ }
204
+ return protected_example
205
+
206
+ @classmethod
207
+ def _unescape_pipe(cls, string):
208
+ # replaces back any occurrences of the separator in a string
209
+ return string.replace(cls.pipe_protector, "|||")
210
+
211
+ class Metadata(yaml.YAMLObject):
212
+ """
213
+ Metadata for a prompt template.
214
+ """
215
+
216
+ yaml_tag = "!TemplateMetadata"
217
+
218
+ def __init__(
219
+ self,
220
+ original_task: Optional[bool] = None,
221
+ choices_in_prompt: Optional[bool] = None,
222
+ metrics: Optional[List[str]] = None,
223
+ ):
224
+ """
225
+ Initializes template metadata.
226
+
227
+ In the following, trivial choices are defined as Yes/No, True/False,
228
+ etc. and nontrivial choices are other types of choices denoted in
229
+ the answer_choices field.
230
+
231
+ :param original_task: If True, this prompt asks a model to perform the original task designed for
232
+ this dataset.
233
+ :param choices_in_prompt: If True, the answer choices are included in the templates such that models
234
+ see those choices in the input. Only applicable to classification tasks.
235
+ :param metrics: List of strings denoting metrics to use for evaluation
236
+ """
237
+ self.original_task = original_task
238
+ self.choices_in_prompt = choices_in_prompt
239
+ self.metrics = metrics
240
+
241
+
242
+ class TemplateCollection:
243
+ """
244
+ This helper class wraps the DatasetTemplates class
245
+ - Initialized the DatasetTemplates for all existing template folder
246
+ - Give access to each DatasetTemplates
247
+ - Provides aggregated counts over all DatasetTemplates
248
+ """
249
+
250
+ def __init__(self):
251
+
252
+ # Dict of all the DatasetTemplates, key is the tuple (dataset_name, subset_name)
253
+ self.datasets_templates: Dict[(str, Optional[str]), DatasetTemplates] = self._collect_datasets()
254
+
255
+ @property
256
+ def keys(self):
257
+ return list(self.datasets_templates.keys())
258
+
259
+ def __len__(self) -> int:
260
+ return len(self.datasets_templates)
261
+
262
+ def remove(self, dataset_name: str, subset_name: Optional[str] = None) -> None:
263
+ del self.datasets_templates[dataset_name, subset_name]
264
+
265
+ def _collect_datasets(self) -> Dict[Tuple[str, str], "DatasetTemplates"]:
266
+ """
267
+ Initialize a DatasetTemplates object for each templates.yaml detected in the templates folder
268
+
269
+ Returns: a dict with key=(dataset_name, subset_name)
270
+ """
271
+ dataset_folders = os.listdir(TEMPLATES_FOLDER_PATH)
272
+ dataset_folders = [folder for folder in dataset_folders if not folder.startswith(".")]
273
+
274
+ output = {} # format is {(dataset_name, subset_name): DatasetsTemplates}
275
+ for dataset in dataset_folders:
276
+ if dataset in INCLUDED_USERS:
277
+ for filename in os.listdir(os.path.join(TEMPLATES_FOLDER_PATH, dataset)):
278
+ output = {**output, **self._collect_dataset(dataset + "/" + filename)}
279
+ else:
280
+ output = {**output, **self._collect_dataset(dataset)}
281
+ return output
282
+
283
+ def _collect_dataset(self, dataset):
284
+ output = {} # format is {(dataset_name, subset_name): DatasetsTemplates}
285
+ for filename in os.listdir(os.path.join(TEMPLATES_FOLDER_PATH, dataset)):
286
+ if filename.endswith(".yaml"):
287
+ # If there is no sub-folder, there is no subset for this dataset
288
+ output[(dataset, None)] = DatasetTemplates(dataset)
289
+ else:
290
+ # This is a subfolder, and its name corresponds to the subset name
291
+ output[(dataset, filename)] = DatasetTemplates(dataset_name=dataset, subset_name=filename)
292
+ return output
293
+
294
+ def get_dataset(self, dataset_name: str, subset_name: Optional[str] = None) -> "DatasetTemplates":
295
+ """
296
+ Return the DatasetTemplates object corresponding to the dataset name
297
+
298
+ :param dataset_name: name of the dataset to get
299
+ :param subset_name: name of the subset
300
+ """
301
+ # if the dataset does not exist, we add it
302
+ if dataset_name not in self.keys:
303
+ self.datasets_templates[(dataset_name, subset_name)] = DatasetTemplates(dataset_name, subset_name)
304
+
305
+ return self.datasets_templates[(dataset_name, subset_name)]
306
+
307
+ def get_templates_count(self) -> Dict:
308
+ """
309
+ Return the overall number count over all datasets
310
+
311
+ NB: we don't breakdown datasets into subsets for the count, i.e subsets count are included
312
+ into the dataset count
313
+ """
314
+
315
+ count_dict = defaultdict(int)
316
+ for k, v in self.datasets_templates.items():
317
+ # Subsets count towards dataset count
318
+ count_dict[k[0]] += len(v)
319
+ # converting to regular dict
320
+ return dict(count_dict)
321
+
322
+
323
+ class DatasetTemplates:
324
+ """
325
+ Class that wraps all templates for a specific dataset/subset and implements all the helper
326
+ functions necessary to read/write to the yaml file
327
+ """
328
+
329
+ TEMPLATES_KEY = "templates"
330
+ DATASET_KEY = "dataset"
331
+ SUBSET_KEY = "subset"
332
+ TEMPLATE_FILENAME = "templates.yaml"
333
+
334
+ def __init__(self, dataset_name: str, subset_name: str = None):
335
+ self.dataset_name: str = dataset_name
336
+ self.subset_name: str = subset_name
337
+ # dictionary is keyed by template name.
338
+ self.templates: Dict = self.read_from_file()
339
+
340
+ # Mapping from template name to template id
341
+ self.name_to_id_mapping = {}
342
+ self.sync_mapping()
343
+
344
+ def sync_mapping(self) -> None:
345
+ """
346
+ Re-compute the name_to_id_mapping to ensure it is in sync with self.templates
347
+ """
348
+ self.name_to_id_mapping = {template.name: template.id for template in self.templates.values()}
349
+
350
+ @property
351
+ def all_template_names(self) -> List[str]:
352
+ """
353
+ Sorted list of all templates names for this dataset
354
+ """
355
+ return sorted([template.name for template in self.templates.values()])
356
+
357
+ @property
358
+ def folder_path(self) -> str:
359
+ if self.subset_name:
360
+ return os.path.join(TEMPLATES_FOLDER_PATH, self.dataset_name, self.subset_name)
361
+ else:
362
+ return os.path.join(TEMPLATES_FOLDER_PATH, self.dataset_name)
363
+
364
+ @property
365
+ def yaml_path(self) -> str:
366
+ return os.path.join(self.folder_path, self.TEMPLATE_FILENAME)
367
+
368
+ def format_for_dump(self) -> Dict:
369
+ """
370
+ Create a formatted dictionary for the class attributes
371
+ """
372
+ formatted_dict = {self.DATASET_KEY: self.dataset_name, self.TEMPLATES_KEY: self.templates}
373
+ if self.subset_name:
374
+ formatted_dict[self.SUBSET_KEY] = self.subset_name
375
+ return formatted_dict
376
+
377
+ def read_from_file(self) -> Dict:
378
+ """
379
+ Reads a file containing a prompt collection.
380
+ """
381
+
382
+ if not os.path.exists(self.yaml_path):
383
+ return {}
384
+ yaml_dict = yaml.load(open(self.yaml_path, "r"), Loader=yaml.FullLoader)
385
+ return yaml_dict[self.TEMPLATES_KEY]
386
+
387
+ def write_to_file(self) -> None:
388
+ """
389
+ Writes to a file with the current prompt collection.
390
+ """
391
+ # Sync the mapping
392
+ self.sync_mapping()
393
+
394
+ # We only create the folder if a template is written
395
+ if not os.path.exists(self.folder_path):
396
+ os.makedirs(self.folder_path)
397
+ yaml.dump(self.format_for_dump(), open(self.yaml_path, "w"))
398
+
399
+ def add_template(self, template: "Template") -> None:
400
+ """
401
+ Adds a new template for the dataset
402
+
403
+ :param template: template
404
+ """
405
+ self.templates[template.get_id()] = template
406
+
407
+ self.write_to_file()
408
+
409
+ def remove_template(self, template_name: str) -> None:
410
+ """
411
+ Deletes a template
412
+
413
+ :param template_name: name of template to remove
414
+ """
415
+
416
+ # Even if we have an ID, we want to check for duplicate names
417
+ if template_name not in self.all_template_names:
418
+ raise ValueError(f"No template with name {template_name} for dataset {self.dataset_name} exists.")
419
+
420
+ del self.templates[self.name_to_id_mapping[template_name]]
421
+
422
+ if len(self.templates) == 0:
423
+ # There is no remaining template, we can remove the entire folder
424
+ self.delete_folder()
425
+ else:
426
+ # We just update the file
427
+ self.write_to_file()
428
+
429
+ def update_template(
430
+ self,
431
+ current_template_name: str,
432
+ new_template_name: str,
433
+ jinja: str,
434
+ reference: str,
435
+ metadata: Template.Metadata,
436
+ answer_choices: str,
437
+ ) -> None:
438
+ """
439
+ Updates a pre-existing template and writes changes
440
+
441
+ :param current_template_name: current name of the template stored in self.templates
442
+ :param new_template_name: new name for the template
443
+ :param jinja: new jinja entry
444
+ :param reference: new reference entry
445
+ :param metadata: a Metadata object with template annotations
446
+ :param answer_choices: new answer_choices string
447
+ """
448
+ template_id = self.name_to_id_mapping[current_template_name]
449
+ self.templates[template_id].name = new_template_name
450
+ self.templates[template_id].jinja = jinja
451
+ self.templates[template_id].reference = reference
452
+ self.templates[template_id].metadata = metadata
453
+ self.templates[template_id].answer_choices = answer_choices
454
+
455
+ self.write_to_file()
456
+
457
+ def delete_folder(self) -> None:
458
+ """
459
+ Delete the folder corresponding to self.folder_path
460
+ """
461
+ self.sync_mapping()
462
+
463
+ rmtree(self.folder_path)
464
+
465
+ # If it is a subset, we have to check whether to remove the dataset folder
466
+ if self.subset_name:
467
+ # have to check for other folders
468
+ base_dataset_folder = os.path.join(TEMPLATES_FOLDER_PATH, self.dataset_name)
469
+ if len(os.listdir(base_dataset_folder)) == 0:
470
+ rmtree(base_dataset_folder)
471
+
472
+ def __getitem__(self, template_key: str) -> "Template":
473
+ return self.templates[self.name_to_id_mapping[template_key]]
474
+
475
+ def __len__(self) -> int:
476
+ return len(self.templates)
477
+
478
+
479
+ def get_templates_data_frame():
480
+ """
481
+ Gathers all template information into a Pandas DataFrame.
482
+
483
+ :return: Pandas DataFrame
484
+ """
485
+ data = {
486
+ "id": [],
487
+ "dataset": [],
488
+ "subset": [],
489
+ "name": [],
490
+ "reference": [],
491
+ "original_task": [],
492
+ "choices_in_prompt": [],
493
+ "metrics": [],
494
+ "answer_choices": [],
495
+ "jinja": [],
496
+ }
497
+
498
+ template_collection = TemplateCollection()
499
+
500
+ for key in template_collection.keys:
501
+ templates = template_collection.get_dataset(key[0], key[1])
502
+ for template_name in templates.all_template_names:
503
+ template = templates[template_name]
504
+ data["id"].append(template.get_id())
505
+ data["dataset"].append(key[0])
506
+ data["subset"].append(key[1])
507
+ data["name"].append(template.get_name())
508
+ data["reference"].append(template.get_reference())
509
+ data["original_task"].append(template.metadata.original_task)
510
+ data["choices_in_prompt"].append(template.metadata.choices_in_prompt)
511
+ data["metrics"].append(template.metadata.metrics)
512
+ data["answer_choices"].append(template.get_answer_choices_expr())
513
+ data["jinja"].append(template.jinja)
514
+
515
+ return pd.DataFrame(data)
promptsource/templates/Zaid/coqa_expanded/templates.yaml ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: Zaid/coqa_expanded
2
+ templates:
3
+ 12ad4331-d063-4b56-b0f6-76f59c690717: !Template
4
+ answer_choices: null
5
+ id: 12ad4331-d063-4b56-b0f6-76f59c690717
6
+ jinja: "Below is a passage, followed by a series of questions and answers about\
7
+ \ the passage. Answer the last question based on the information contained in\
8
+ \ the passage. If there is no answer in the passage, say \"unknown\".\n\nPassage:\
9
+ \ {{story}}\n\nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1\
10
+ \ %}\n{{answer[\"input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
11
+ metadata: !TemplateMetadata
12
+ choices_in_prompt: false
13
+ metrics:
14
+ - Other
15
+ original_task: true
16
+ name: Verbose instructions
17
+ reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
18
+ 2f9fb20d-f4c9-4371-9cd4-db47607cb7a3: !Template
19
+ answer_choices: null
20
+ id: 2f9fb20d-f4c9-4371-9cd4-db47607cb7a3
21
+ jinja: "What is the answer to the last question in the dialogue below? If there\
22
+ \ is no answer in the passage, say \"unknown\".\n\nPassage: {{story}}\n\nQ:\
23
+ \ {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"\
24
+ input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
25
+ metadata: !TemplateMetadata
26
+ choices_in_prompt: false
27
+ metrics:
28
+ - Other
29
+ original_task: true
30
+ name: What is the answer
31
+ reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
32
+ 9aff8967-d41c-4d79-8ef4-fc3650773735: !Template
33
+ answer_choices: null
34
+ id: 9aff8967-d41c-4d79-8ef4-fc3650773735
35
+ jinja: "Complete the dialogue based on the information contained in the passage.\
36
+ \ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{story}}\n\
37
+ \nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"\
38
+ input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
39
+ metadata: !TemplateMetadata
40
+ choices_in_prompt: false
41
+ metrics:
42
+ - Other
43
+ original_task: true
44
+ name: Complete the dialogue
45
+ reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
46
+ 9bc32f2e-eee6-4006-bce3-74a79403d33e: !Template
47
+ answer_choices: null
48
+ id: 9bc32f2e-eee6-4006-bce3-74a79403d33e
49
+ jinja: "Answer the last question based on the information contained in the passage.\
50
+ \ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{story}}\n\
51
+ \nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"\
52
+ input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
53
+ metadata: !TemplateMetadata
54
+ choices_in_prompt: false
55
+ metrics:
56
+ - Other
57
+ original_task: true
58
+ name: Answer the last question
59
+ reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
60
+ bacb6534-e607-4afc-a412-ccfcd9fe38e2: !Template
61
+ answer_choices: null
62
+ id: bacb6534-e607-4afc-a412-ccfcd9fe38e2
63
+ jinja: 'In the passage below, extract the part which answers the last question.
64
+ If there is no answer in the passage, say "unknown".
65
+
66
+
67
+ Passage: {{story}}
68
+
69
+
70
+ Q: {{question}}
71
+
72
+ A: |||
73
+
74
+ {% if answer["answer_start"] != -1 %}
75
+
76
+ {{story[answer["answer_start"] : answer["answer_end"] ]}}
77
+
78
+ {% else %}
79
+
80
+ unknown
81
+
82
+ {% endif %}'
83
+ metadata: !TemplateMetadata
84
+ choices_in_prompt: false
85
+ metrics:
86
+ - Squad
87
+ original_task: false
88
+ name: extract_answer
89
+ reference: ''
90
+ be39974f-aa86-4076-b444-bd3c2732b17b: !Template
91
+ answer_choices: null
92
+ id: be39974f-aa86-4076-b444-bd3c2732b17b
93
+ jinja: "Help me complete the dialogue about this passage. If there is no answer\
94
+ \ in the passage, say \"unknown\".\n\nPassage: {{story}}\n\nQ: {{question}}\
95
+ \ \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"input_text\"\
96
+ ]}}\n{% else %}\nunknown\n{% endif %}"
97
+ metadata: !TemplateMetadata
98
+ choices_in_prompt: false
99
+ metrics:
100
+ - Other
101
+ original_task: true
102
+ name: Help me
103
+ reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
104
+ d95440ce-d538-40f8-ae09-664e05852ca8: !Template
105
+ answer_choices: null
106
+ id: d95440ce-d538-40f8-ae09-664e05852ca8
107
+ jinja: "{{story}}\n\nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] !=\
108
+ \ -1 %}\n{{answer[\"input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
109
+ metadata: !TemplateMetadata
110
+ choices_in_prompt: false
111
+ metrics:
112
+ - Other
113
+ original_task: true
114
+ name: GPT-3 Style
115
+ reference: 'Brown et al. NeurIPS 2020. Metric: variant of SQuAD (Section 6.1 of
116
+ the paper)'
promptsource/templates/Zaid/quac_expanded/templates.yaml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: Zaid/quac_expanded
2
+ templates:
3
+ 01d8c949-89a7-4a44-9a39-6cf2ac3e0a7b: !Template
4
+ answer_choices: null
5
+ id: 01d8c949-89a7-4a44-9a39-6cf2ac3e0a7b
6
+ jinja: "What is the answer to the last question in the dialogue below? If there\
7
+ \ is no answer in the passage, say \"unknown\".\n\nPassage: {{context}}\n\n\
8
+ Q: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
9
+ metadata: !TemplateMetadata
10
+ choices_in_prompt: false
11
+ metrics:
12
+ - Other
13
+ original_task: true
14
+ name: What is the answer
15
+ reference: 'Metric: F1'
16
+ 1484c6e6-bf42-47ca-9ea7-c3c552a24de1: !Template
17
+ answer_choices: null
18
+ id: 1484c6e6-bf42-47ca-9ea7-c3c552a24de1
19
+ jinja: "{{context}}\n\nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
20
+ metadata: !TemplateMetadata
21
+ choices_in_prompt: false
22
+ metrics:
23
+ - Other
24
+ original_task: true
25
+ name: GPT-3 Style
26
+ reference: 'Brown et al. NeurIPS 2020. Metric: F1'
27
+ 2bca0532-01a3-4a64-a228-a57ae0965719: !Template
28
+ answer_choices: null
29
+ id: 2bca0532-01a3-4a64-a228-a57ae0965719
30
+ jinja: "Below is a passage, followed by a series of questions and answers about\
31
+ \ the passage. Answer the last question based on the information contained in\
32
+ \ the passage. If there is no answer in the passage, say \"unknown\".\n\nPassage:\
33
+ \ {{context}}\n\nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
34
+ metadata: !TemplateMetadata
35
+ choices_in_prompt: false
36
+ metrics:
37
+ - Other
38
+ original_task: true
39
+ name: Verbose instructions
40
+ reference: 'Metric: F1'
41
+ 4abd0379-dbc0-4f71-901b-dd0af3581157: !Template
42
+ answer_choices: null
43
+ id: 4abd0379-dbc0-4f71-901b-dd0af3581157
44
+ jinja: "Answer the last question based on the information contained in the passage.\
45
+ \ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{context}}\n\
46
+ \nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
47
+ metadata: !TemplateMetadata
48
+ choices_in_prompt: false
49
+ metrics:
50
+ - Other
51
+ original_task: true
52
+ name: Answer the last question
53
+ reference: 'Metric: F1'
54
+ 8ebbd098-b40c-4e69-8cbb-0ffecf0fe2a6: !Template
55
+ answer_choices: null
56
+ id: 8ebbd098-b40c-4e69-8cbb-0ffecf0fe2a6
57
+ jinja: "Complete the dialogue based on the information contained in the passage.\
58
+ \ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{context}}\n\
59
+ \nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
60
+ metadata: !TemplateMetadata
61
+ choices_in_prompt: false
62
+ metrics:
63
+ - Other
64
+ original_task: true
65
+ name: Complete the dialogue
66
+ reference: 'Metric: F1'
67
+ e624695b-5d26-47cc-bdb4-ac2bee4ddaea: !Template
68
+ answer_choices: null
69
+ id: e624695b-5d26-47cc-bdb4-ac2bee4ddaea
70
+ jinja: "Help me complete the dialogue about this passage. If there is no answer\
71
+ \ in the passage, say \"unknown\".\n\nPassage: {{context}}\n\nQ: {{question}}\
72
+ \ \nA: ||| {{answer[\"texts\"][0]}}"
73
+ metadata: !TemplateMetadata
74
+ choices_in_prompt: false
75
+ metrics:
76
+ - Other
77
+ original_task: true
78
+ name: Help me
79
+ reference: 'Metric: F1'
promptsource/templates/acronym_identification/templates.yaml ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: acronym_identification
2
+ templates:
3
+ 64f438f2-9968-459f-82d2-24bad632b358: !Template
4
+ answer_choices: null
5
+ id: 64f438f2-9968-459f-82d2-24bad632b358
6
+ jinja: "{% set random_abbr = '' %}\n{% set _dummy = none %}\n{% set abbr_exp_dict\
7
+ \ = namespace(value = {}) %}\n{% set abbr_string=namespace(value='') %}\n{%\
8
+ \ set exp_string=namespace(value='')%}\n \n{% for label_idx in range(labels|length)\
9
+ \ %}\n {% if labels[label_idx] == 0 %}{# Long Beginning #}\n {% set exp_string.value\
10
+ \ = tokens[label_idx] %}{# Create new long string #}\n {% elif labels[label_idx]\
11
+ \ == 1 %}{# Short Beginning #}\n {% if abbr_string.value!='' and abbr_string.value\
12
+ \ not in abbr_exp_dict.value.keys()%}{# Some string already present #}\n \
13
+ \ {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:''}) %}{#\
14
+ \ Discard this string as a new short string is coming #}\n {% endif %}\n\
15
+ \ {% set abbr_string.value = tokens[label_idx] %}{# Create new short string\
16
+ \ #}\n {% elif labels[label_idx] == 2 %}{# Long Intermediate #}\n {% set\
17
+ \ exp_string.value = exp_string.value+' '+tokens[label_idx] %}{# Update existing\
18
+ \ string #}\n {% elif labels[label_idx] == 3 %}{# Short Intermediate #}\n \
19
+ \ {% set abbr_string.value = abbr_string.value+tokens[label_idx] %}{# Update\
20
+ \ existing string #}\n {% else %}{# Other #}\n {# Both non-empty, and first\
21
+ \ characters match #}\n {% if abbr_string.value!='' and exp_string.value!=''\
22
+ \ and exp_string.value.split()[0][0]|lower in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower\
23
+ \ in abbr_string.value|lower%}\n {# Update both the dictionaries #}\n \
24
+ \ {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
25
+ \ %}\n {# Empty both the strings #}\n {% set abbr_string.value= ''\
26
+ \ %}\n {% set exp_string.value= '' %}\n {% endif %}\n {% endif %}\n\
27
+ {% endfor %}\n{# Both non-empty, and first characters match #}\n{% if abbr_string.value!=''\
28
+ \ and exp_string.value!='' %}\n {% if exp_string.value.split()[0][0]|lower\
29
+ \ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower\
30
+ \ %}\n {# Update both the dictionaries #}\n {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
31
+ \ %}\n {% elif abbr_exp_dict.value.items()|length==0 %}\n {% set _dummy\
32
+ \ = abbr_exp_dict.value.update({abbr_string.value:exp_string.value}) %}\n {%\
33
+ \ endif %}\n{% else %}\n {% if abbr_string.value!=''%}\n {% if abbr_string.value\
34
+ \ not in abbr_exp_dict.value.keys() %}\n {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:''})\
35
+ \ %}\n {% endif %}\n {% endif %}\n{% endif %}\n{% if abbr_exp_dict.value\
36
+ \ %}\n{% set random_abbr = abbr_exp_dict.value.keys()|list|choice %}\nGiven\
37
+ \ the following tokens, find the expansion of {{random_abbr}}. Return {{\"Unclear\"\
38
+ }} if the expansion can't be found.\n \n{{tokens|join(' ')}}\n|||\n{% if random_abbr\
39
+ \ in abbr_exp_dict.value.keys() and abbr_exp_dict.value[random_abbr]!='' %}\n\
40
+ {{abbr_exp_dict.value[random_abbr]}}\n{% else %}\nUnclear\n{% endif %}\n{% endif\
41
+ \ %}"
42
+ metadata: !TemplateMetadata
43
+ choices_in_prompt: null
44
+ metrics: []
45
+ original_task: false
46
+ name: find_expansion
47
+ reference: Given the tokens, find the expansion of an abbreviation in the tokens.
48
+ 81babc83-18cd-4eed-a343-8ede56b21df5: !Template
49
+ answer_choices: null
50
+ id: 81babc83-18cd-4eed-a343-8ede56b21df5
51
+ jinja: "Given the BIO encoding as follows: \"{{\"B-short\"}}\" and \"{{\"I-short\"\
52
+ }}\" represent the beginning and intermediate tokens for abbreviations.\"{{\"\
53
+ B-long\"}}\" and \"{{\"I-long\"}}\" represent the beginning and intermediate\
54
+ \ tokens for expansions of the abbreviations. All other tokens are represented\
55
+ \ by \"{{\"O\"}}\". \nGenerate comma-separated BIO encoding for the following\
56
+ \ comma-separated tokens: \n\n{{tokens|join(', ')}}\n|||\n{% for label in labels\
57
+ \ %}{{[\"B-long\", \"B-short\", \"I-long\", \"I-short\", \"O\"][label]}}{%\
58
+ \ if not loop.last %},{%endif %}{% endfor %}"
59
+ metadata: !TemplateMetadata
60
+ choices_in_prompt: null
61
+ metrics: []
62
+ original_task: true
63
+ name: bio_encode
64
+ reference: Given the comma separated tokens, generate BIO encoding for abbreviations.
65
+ 8832e5f7-7c45-46da-b85f-71fcb444f264: !Template
66
+ answer_choices: null
67
+ id: 8832e5f7-7c45-46da-b85f-71fcb444f264
68
+ jinja: 'List all the expansions of the acronyms present in the following comma-separated
69
+ tokens. Return {{"No expansions found"}} if the expansions can''t be found.
70
+
71
+ {{tokens|join('', '')}}
72
+
73
+ |||
74
+
75
+ {% set abbr_string=namespace(value='''') %}
76
+
77
+ {% set answer_list=namespace(value=[]) %}
78
+
79
+ {% for label_idx in range(labels|length) %}
80
+
81
+ {% if labels[label_idx] == 0 %}
82
+
83
+ {% set abbr_string.value = tokens[label_idx] %}
84
+
85
+ {% elif abbr_string.value!='''' and labels[label_idx]==2%}
86
+
87
+ {% set abbr_string.value = abbr_string.value+'' ''+tokens[label_idx] %}
88
+
89
+ {% elif abbr_string.value!='''' and labels[label_idx]!=2%}
90
+
91
+ {% set answer_list.value = answer_list.value +[abbr_string.value] %}
92
+
93
+ {% set abbr_string.value = '''' %}
94
+
95
+ {% endif %}
96
+
97
+ {% if loop.last and abbr_string.value!='''' %}
98
+
99
+ {% set answer_list.value = answer_list.value +[abbr_string.value] %}
100
+
101
+ {% endif %}
102
+
103
+ {% endfor %}
104
+
105
+ {% if answer_list.value|length!=0 %}
106
+
107
+ {{ answer_list.value|join('', '') }}
108
+
109
+ {% else %}
110
+
111
+ No expansions found.
112
+
113
+ {% endif %}'
114
+ metadata: !TemplateMetadata
115
+ choices_in_prompt: null
116
+ metrics: []
117
+ original_task: false
118
+ name: list_expansions
119
+ reference: Given the tokens, list the expansion tokens.
120
+ cae58242-cde9-472d-ae9e-56fc7e79c0d1: !Template
121
+ answer_choices: null
122
+ id: cae58242-cde9-472d-ae9e-56fc7e79c0d1
123
+ jinja: "List all the acryonyms in the following comma-separated tokens: \n\n{{tokens|join(',\
124
+ \ ')}}\n|||\n{% set abbr_string=namespace(value='') %}\n{% set answer_list=namespace(value=[])\
125
+ \ %}\n{% for label_idx in range(labels|length) %}\n{% if labels[label_idx] ==\
126
+ \ 1 %}\n{% set abbr_string.value = tokens[label_idx] %}\n{% elif abbr_string.value!=''\
127
+ \ and labels[label_idx]==3%}\n{% set abbr_string.value = abbr_string.value+tokens[label_idx]\
128
+ \ %}\n{% elif abbr_string.value!='' and labels[label_idx]!=3 %}\n{% set answer_list.value\
129
+ \ = answer_list.value +[abbr_string.value] %}\n{% set abbr_string.value = ''\
130
+ \ %}\n{% endif %}\n{% if loop.last and abbr_string.value!='' %}\n{% set answer_list.value\
131
+ \ = answer_list.value +[abbr_string.value] %}\n{% endif %}\n{% endfor %}\n{{\
132
+ \ answer_list.value|join(', ') }}"
133
+ metadata: !TemplateMetadata
134
+ choices_in_prompt: null
135
+ metrics: []
136
+ original_task: false
137
+ name: list_abbreviations
138
+ reference: Given the tokens, list the abbreviations.
139
+ e4e42433-0e37-4aa5-bbce-7f336ecac6a3: !Template
140
+ answer_choices: null
141
+ id: e4e42433-0e37-4aa5-bbce-7f336ecac6a3
142
+ jinja: "{% set _dummy = none %}\n{% set abbr_exp_dict = namespace(value = {})\
143
+ \ %}\n{% set abbr_string=namespace(value='') %}\n{% set exp_string=namespace(value='')%}\n\
144
+ \ \n{% for label_idx in range(labels|length) %}\n {% if labels[label_idx] ==\
145
+ \ 0 %}{# Long Beginning #}\n {% set exp_string.value = tokens[label_idx]\
146
+ \ %}{# Create new long string #}\n {% elif labels[label_idx] == 1 %}{# Short\
147
+ \ Beginning #}\n {% if abbr_string.value!='' and abbr_string.value not in\
148
+ \ abbr_exp_dict.value.keys()%}{# Some string already present #}\n {% set\
149
+ \ _dummy = abbr_exp_dict.value.update({abbr_string.value:''}) %}{# Discard this\
150
+ \ string as a new short string is coming #}\n {% endif %}\n {% set abbr_string.value\
151
+ \ = tokens[label_idx] %}{# Create new short string #}\n {% elif labels[label_idx]\
152
+ \ == 2 %}{# Long Intermediate #}\n {% set exp_string.value = exp_string.value+'\
153
+ \ '+tokens[label_idx] %}{# Update existing string #}\n {% elif labels[label_idx]\
154
+ \ == 3 %}{# Short Intermediate #}\n {% set abbr_string.value = abbr_string.value+tokens[label_idx]\
155
+ \ %}{# Update existing string #}\n {% else %}{# Other #}\n {# Both non-empty,\
156
+ \ and first characters match #}\n {% if abbr_string.value!='' and exp_string.value!=''\
157
+ \ and exp_string.value.split()[0][0]|lower in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower\
158
+ \ in abbr_string.value|lower%}\n {# Update both the dictionaries #}\n \
159
+ \ {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
160
+ \ %}\n {# Empty both the strings #}\n {% set abbr_string.value= ''\
161
+ \ %}\n {% set exp_string.value= '' %}\n {% endif %}\n {% endif %}\n\
162
+ {% endfor %}\n{# Both non-empty, and first characters match #}\n{% if abbr_string.value!=''\
163
+ \ and exp_string.value!='' %}\n {% if exp_string.value.split()[0][0]|lower\
164
+ \ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower\
165
+ \ %}\n {# Update both the dictionaries #}\n {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
166
+ \ %}\n {% elif abbr_exp_dict.value.items()|length==0 %}\n {% set _dummy\
167
+ \ = abbr_exp_dict.value.update({abbr_string.value:exp_string.value}) %}\n {%\
168
+ \ endif %}\n{% else %}\n {% if abbr_string.value!=''%}\n {% if abbr_string.value\
169
+ \ not in abbr_exp_dict.value.keys() %}\n {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:''})\
170
+ \ %}\n {% endif %}\n {% endif %}\n{% endif %}\n \nGiven the following tokens,\
171
+ \ find the abbreviations and their expansions. Return {{\"Unclear\"}} if the\
172
+ \ expansion can't be found.\n \n{{tokens|join(' ')}}\n|||\n{% for item, value\
173
+ \ in abbr_exp_dict.value.items() %}\n{{item}} : {% if value!='' %}{{value}}{%\
174
+ \ else %}Unclear{% endif %}\n{%endfor%}"
175
+ metadata: !TemplateMetadata
176
+ choices_in_prompt: null
177
+ metrics: []
178
+ original_task: false
179
+ name: find_mapping
180
+ reference: Given the tokens, find the abbreviation mapping.
181
+ eed32ee4-ebc3-499f-ba61-e91461f56ccb: !Template
182
+ answer_choices: null
183
+ id: eed32ee4-ebc3-499f-ba61-e91461f56ccb
184
+ jinja: "{% set random_exp = '' %}{% set _dummy = none %}{% set exp_abbr_dict =\
185
+ \ namespace(value = {}) %}{% set abbr_string=namespace(value='') %}{% set exp_string=namespace(value='')%}{%\
186
+ \ for label_idx in range(labels|length) %}{% if labels[label_idx] == 0 %}{#\
187
+ \ Long Beginning #}{% if exp_string.value!='' and exp_string.value not in exp_abbr_dict.value.keys()\
188
+ \ %}{# Some string already present #}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:''})\
189
+ \ %}{# Discard this string as a new long string is coming #} {% endif %}{% set\
190
+ \ exp_string.value = tokens[label_idx] %}{# Create new long string #}{% elif\
191
+ \ labels[label_idx] == 1 %}{# Short Beginning #}{% set abbr_string.value = tokens[label_idx]\
192
+ \ %}{# Create new short string #}{% elif labels[label_idx] == 2 %}{# Long Intermediate\
193
+ \ #}{% set exp_string.value = exp_string.value+' '+tokens[label_idx] %}{# Update\
194
+ \ existing string #}{% elif labels[label_idx] == 3 %}{# Short Intermediate #}{%\
195
+ \ set abbr_string.value = abbr_string.value+tokens[label_idx] %}{# Update existing\
196
+ \ string #}{% else %}{# Other #}{# Both non-empty, and first characters match\
197
+ \ #}{% if abbr_string.value!='' and exp_string.value!='' and exp_string.value.split()[0][0]|lower\
198
+ \ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower%}{#\
199
+ \ Update both the dictionaries #}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:abbr_string.value})\
200
+ \ %}{# Empty both the strings #}{% set abbr_string.value= '' %}{% set exp_string.value=\
201
+ \ '' %}{% endif %}{% endif %}{% endfor %}{# Both non-empty, and first characters\
202
+ \ match #}{% if abbr_string.value!='' and exp_string.value!='' %}{% if exp_string.value.split()[0][0]|lower\
203
+ \ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower\
204
+ \ %}{# Update the dictionary #}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:abbr_string.value})\
205
+ \ %}{% elif exp_abbr_dict.value.items()|length==0 %}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:abbr_string.value})\
206
+ \ %}{% endif %}{% else %}{% if exp_string.value!='' %}{% if exp_string.value\
207
+ \ not in exp_abbr_dict.value.keys() %}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:''})\
208
+ \ %}{% endif %}{% endif %}{% endif %}{% if exp_abbr_dict.value.items()|length!=0\
209
+ \ %}{% set random_exp = exp_abbr_dict.value.keys()|list|choice %}Given the following\
210
+ \ tokens, find the abbreviation for: {{random_exp}}. Return \"Unclear\" if the\
211
+ \ abbreviation can't be found.\n \n{{tokens|join(' ')}}|||{% if random_exp in\
212
+ \ exp_abbr_dict.value.keys() and exp_abbr_dict.value[random_exp]!='' %}{{exp_abbr_dict.value[random_exp]}}{%\
213
+ \ else %}Unclear{% endif %}{% endif %}"
214
+ metadata: !TemplateMetadata
215
+ choices_in_prompt: null
216
+ metrics: []
217
+ original_task: false
218
+ name: find_abbreviation
219
+ reference: Given the tokens, find the abbreviation for an expansion.
promptsource/templates/ade_corpus_v2/Ade_corpus_v2_classification/templates.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: ade_corpus_v2
2
+ subset: Ade_corpus_v2_classification
3
+ templates:
4
+ 56bd12a8-b8ee-464e-98cc-5f586ba9f74d: !Template
5
+ answer_choices: Not-Related ||| Related
6
+ id: 56bd12a8-b8ee-464e-98cc-5f586ba9f74d
7
+ jinja: Is "{{text}}" related to adverse drug effect (ADE)? ||| {{answer_choices[label]}}
8
+ metadata: !TemplateMetadata
9
+ choices_in_prompt: null
10
+ metrics: []
11
+ original_task: true
12
+ name: baseline
13
+ reference: ''
14
+ 78c4ce65-dd66-46ed-878d-11f4eca5e544: !Template
15
+ answer_choices: Yes, it is related to adverse drug effect. ||| No, it is not related
16
+ to adverse drug effect.
17
+ id: 78c4ce65-dd66-46ed-878d-11f4eca5e544
18
+ jinja: "Read the below text and answer the question.\n\nText: {{text}} \n\nQuestion:\
19
+ \ Is the above text related to adverse drug effect (ADE)?\n\nA. Yes, it is related\
20
+ \ to adverse drug effect.\n\nB. No, it is not related to adverse drug effect.\n\
21
+ |||\n{{answer_choices[label]}}"
22
+ metadata: !TemplateMetadata
23
+ choices_in_prompt: null
24
+ metrics: []
25
+ original_task: null
26
+ name: verbose
27
+ reference: ''
28
+ dabc0337-5bd3-4150-98b3-794a15ce1a3a: !Template
29
+ answer_choices: null
30
+ id: dabc0337-5bd3-4150-98b3-794a15ce1a3a
31
+ jinja: "{% if label==1 %}\nWrite a medical report that is related to adverse drug\
32
+ \ effect (ADE). \n{% else %}\nWrite a medical report that is not related to\
33
+ \ adverse drug effect (ADE). \n{% endif %}\n|||\n{{text}}"
34
+ metadata: !TemplateMetadata
35
+ choices_in_prompt: null
36
+ metrics: []
37
+ original_task: null
38
+ name: label-to-text
39
+ reference: ''
promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_ade_relation/templates.yaml ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: ade_corpus_v2
2
+ subset: Ade_corpus_v2_drug_ade_relation
3
+ templates:
4
+ 0ec35408-652d-4ebc-9478-5a0d330c24c8: !Template
5
+ answer_choices: null
6
+ id: 0ec35408-652d-4ebc-9478-5a0d330c24c8
7
+ jinja: 'What drug has an effect of {{effect}}?
8
+
9
+ |||
10
+
11
+ {{drug}}'
12
+ metadata: !TemplateMetadata
13
+ choices_in_prompt: null
14
+ metrics: []
15
+ original_task: null
16
+ name: effect2drug
17
+ reference: ''
18
+ 2682a789-a435-4976-b34f-f376991c842a: !Template
19
+ answer_choices: null
20
+ id: 2682a789-a435-4976-b34f-f376991c842a
21
+ jinja: '{{drug}} has an effect of {{effect}}. Create a sentence using this drug
22
+ and its effect.
23
+
24
+ |||
25
+
26
+ {{text}}'
27
+ metadata: !TemplateMetadata
28
+ choices_in_prompt: null
29
+ metrics: []
30
+ original_task: null
31
+ name: drug-and-effect-to-text
32
+ reference: ''
33
+ 61ba3622-72bc-4fd8-acfc-826bc2a93aa5: !Template
34
+ answer_choices: null
35
+ id: 61ba3622-72bc-4fd8-acfc-826bc2a93aa5
36
+ jinja: 'What effect does {{drug}} have?
37
+
38
+ |||
39
+
40
+ {{effect}}'
41
+ metadata: !TemplateMetadata
42
+ choices_in_prompt: null
43
+ metrics: []
44
+ original_task: null
45
+ name: drug2effect
46
+ reference: ''
47
+ 6acf3588-baa1-4ff6-87c4-4c2356855464: !Template
48
+ answer_choices: null
49
+ id: 6acf3588-baa1-4ff6-87c4-4c2356855464
50
+ jinja: 'Read the below text and answer the question.
51
+
52
+
53
+ Text: {{text}}
54
+
55
+
56
+ Question: What are the drug and its effect of the above text, respectively?
57
+
58
+ |||
59
+
60
+ {{drug}} and {{effect}}, respectively.'
61
+ metadata: !TemplateMetadata
62
+ choices_in_prompt: null
63
+ metrics: []
64
+ original_task: true
65
+ name: baseline
66
+ reference: ''
67
+ db68e609-ba92-40ae-b161-8b7710124142: !Template
68
+ answer_choices: null
69
+ id: db68e609-ba92-40ae-b161-8b7710124142
70
+ jinja: 'Read the below text and answer the two following questions.
71
+
72
+
73
+ Text: {{text}}
74
+
75
+
76
+ Question 1: What is the drug in the above text?
77
+
78
+
79
+ Question 2: What is the effect of it?
80
+
81
+ |||
82
+
83
+ The drug is {{drug}} and its effect is {{effect}}.'
84
+ metadata: !TemplateMetadata
85
+ choices_in_prompt: null
86
+ metrics: []
87
+ original_task: null
88
+ name: two-questions
89
+ reference: ''
promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_dosage_relation/templates.yaml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: ade_corpus_v2
2
+ subset: Ade_corpus_v2_drug_dosage_relation
3
+ templates:
4
+ 1de6d411-ed0a-4d48-806e-cad009f07a65: !Template
5
+ answer_choices: null
6
+ id: 1de6d411-ed0a-4d48-806e-cad009f07a65
7
+ jinja: 'What drug has a dosage of {{dosage}}?
8
+
9
+ |||
10
+
11
+ {{drug}}'
12
+ metadata: !TemplateMetadata
13
+ choices_in_prompt: null
14
+ metrics: []
15
+ original_task: null
16
+ name: dosage2drug
17
+ reference: ''
18
+ 1e719388-59c9-4b0a-9ed9-dd02b6ddd0a6: !Template
19
+ answer_choices: null
20
+ id: 1e719388-59c9-4b0a-9ed9-dd02b6ddd0a6
21
+ jinja: '{{dosage}} of {{drug}} was given to a patient. What kind of symptom did
22
+ this patient have?
23
+
24
+ |||
25
+
26
+ {{text}}'
27
+ metadata: !TemplateMetadata
28
+ choices_in_prompt: null
29
+ metrics: []
30
+ original_task: null
31
+ name: drug-and-dosage-to-text
32
+ reference: ''
33
+ 2bed0f04-8249-4248-86ea-e3a1971b2e1b: !Template
34
+ answer_choices: null
35
+ id: 2bed0f04-8249-4248-86ea-e3a1971b2e1b
36
+ jinja: 'Read the below text and answer the two following questions.
37
+
38
+
39
+ Text: {{text}}
40
+
41
+
42
+
43
+ Question 1: What is the drug in the above text?
44
+
45
+
46
+ Question 2: What is the dosage of it?
47
+
48
+ |||
49
+
50
+ The drug is {{drug}} and its dosage is {{dosage}}.'
51
+ metadata: !TemplateMetadata
52
+ choices_in_prompt: null
53
+ metrics: []
54
+ original_task: null
55
+ name: two-questions
56
+ reference: ''
57
+ ca175bed-d046-40e7-9dbb-1e50fde7e603: !Template
58
+ answer_choices: null
59
+ id: ca175bed-d046-40e7-9dbb-1e50fde7e603
60
+ jinja: 'What is a possible dosage of {{drug}}?
61
+
62
+ |||
63
+
64
+ {{dosage}}'
65
+ metadata: !TemplateMetadata
66
+ choices_in_prompt: null
67
+ metrics: []
68
+ original_task: null
69
+ name: drug2dosage
70
+ reference: ''
71
+ ce5208ac-6b4c-4a35-8738-e20232df1917: !Template
72
+ answer_choices: null
73
+ id: ce5208ac-6b4c-4a35-8738-e20232df1917
74
+ jinja: "Read the below text and answer the question.\n\nText: {{text}}\n\nQuestion:\
75
+ \ What are the drug and its dosage of the above text, respectively? \n|||\n\
76
+ {{drug}} and {{dosage}}, respectively."
77
+ metadata: !TemplateMetadata
78
+ choices_in_prompt: null
79
+ metrics: []
80
+ original_task: true
81
+ name: baseline
82
+ reference: ''
promptsource/templates/adversarial_qa/adversarialQA/templates.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: adversarial_qa
2
+ subset: adversarialQA
3
+ templates:
4
+ 00755780-f3c0-44b4-b159-8f3873cdb16c: !Template
5
+ answer_choices: null
6
+ id: 00755780-f3c0-44b4-b159-8f3873cdb16c
7
+ jinja: 'I want to test the ability of students to read a passage and answer questions
8
+ about it. Could you please come up with a good question for the passage "{{context}}"?
9
+ |||
10
+
11
+ {{question}}'
12
+ metadata: !TemplateMetadata
13
+ choices_in_prompt: false
14
+ metrics:
15
+ - BLEU
16
+ - ROUGE
17
+ original_task: false
18
+ name: generate_question
19
+ reference: 'Input: Context, Output: Question (generate a question)'
20
+ 3b2459cc-6600-443c-abf8-8f60c34cd998: !Template
21
+ answer_choices: null
22
+ id: 3b2459cc-6600-443c-abf8-8f60c34cd998
23
+ jinja: '{% if metadata.split != "test" %}
24
+
25
+ I know that the answer to the question "{{question}}" is in "{{context}}". Can
26
+ you tell me what it is? |||
27
+
28
+
29
+ {{answers.text | choice}}
30
+
31
+ {% endif %}'
32
+ metadata: !TemplateMetadata
33
+ choices_in_prompt: false
34
+ metrics:
35
+ - Squad
36
+ original_task: true
37
+ name: tell_what_it_is
38
+ reference: 'Input: QC, Output: A (rephrase)'
39
+ 5bdb1815-5c6f-49a3-ad1d-367344420701: !Template
40
+ answer_choices: null
41
+ id: 5bdb1815-5c6f-49a3-ad1d-367344420701
42
+ jinja: '{% if metadata.split != "test" %}
43
+
44
+ Question: "{{question}}"
45
+
46
+
47
+ Context: "{{context}}"
48
+
49
+
50
+ Answer:
51
+
52
+ |||
53
+
54
+ {{answers.text | choice}}
55
+
56
+ {% endif %}'
57
+ metadata: !TemplateMetadata
58
+ choices_in_prompt: false
59
+ metrics:
60
+ - Squad
61
+ original_task: true
62
+ name: question_context_answer
63
+ reference: 'Input: QC, Output: Answer (short form)'
64
+ a0872cde-2f19-4ae6-919a-868da47bfbcb: !Template
65
+ answer_choices: null
66
+ id: a0872cde-2f19-4ae6-919a-868da47bfbcb
67
+ jinja: '{% if metadata.split != "test" %}
68
+
69
+ Extract the answer to the question from the following context.
70
+
71
+ Question: {{question}}
72
+
73
+ Context: {{context}}|||
74
+
75
+ {{answers.text | choice}}
76
+
77
+ {% endif %}'
78
+ metadata: !TemplateMetadata
79
+ choices_in_prompt: false
80
+ metrics:
81
+ - Squad
82
+ original_task: true
83
+ name: based_on
84
+ reference: ''
85
+ a64d5a15-68e2-4d1c-b30a-ca8250c860f9: !Template
86
+ answer_choices: null
87
+ id: a64d5a15-68e2-4d1c-b30a-ca8250c860f9
88
+ jinja: '{% if metadata.split != "test" %}
89
+
90
+ Given the following passage
91
+
92
+
93
+ "{{context}}",
94
+
95
+
96
+ answer the following question. Note that the answer is present within the text.
97
+
98
+
99
+ Question: {{question}} |||
100
+
101
+ {{answers.text | choice}}
102
+
103
+ {% endif %}'
104
+ metadata: !TemplateMetadata
105
+ choices_in_prompt: false
106
+ metrics:
107
+ - Squad
108
+ original_task: true
109
+ name: answer_the_following_q
110
+ reference: 'Input: QC, Output: Answer'
promptsource/templates/adversarial_qa/dbert/templates.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: adversarial_qa
2
+ subset: dbert
3
+ templates:
4
+ 00755780-f3c0-44b4-b159-8f3873cdb16a: !Template
5
+ answer_choices: null
6
+ id: 00755780-f3c0-44b4-b159-8f3873cdb16a
7
+ jinja: 'I want to test the ability of students to read a passage and answer questions
8
+ about it. Could you please come up with a good question for the passage "{{context}}"?
9
+ |||
10
+
11
+ {{question}}'
12
+ metadata: !TemplateMetadata
13
+ choices_in_prompt: false
14
+ metrics:
15
+ - BLEU
16
+ - ROUGE
17
+ original_task: false
18
+ name: generate_question
19
+ reference: 'Input: Context, Output: Question (generate a question)'
20
+ 3b2459cc-6600-443c-abf8-8f60c34cd99a: !Template
21
+ answer_choices: null
22
+ id: 3b2459cc-6600-443c-abf8-8f60c34cd99a
23
+ jinja: '{% if metadata.split != "test" %}
24
+
25
+ I know that the answer to the question "{{question}}" is in "{{context}}". Can
26
+ you tell me what it is? |||
27
+
28
+
29
+ {{answers.text | choice}}
30
+
31
+ {% endif %}'
32
+ metadata: !TemplateMetadata
33
+ choices_in_prompt: false
34
+ metrics:
35
+ - Squad
36
+ original_task: true
37
+ name: tell_what_it_is
38
+ reference: 'Input: QC, Output: A (rephrase)'
39
+ 5bdb1815-5c6f-49a3-ad1d-36734442070a: !Template
40
+ answer_choices: null
41
+ id: 5bdb1815-5c6f-49a3-ad1d-36734442070a
42
+ jinja: '{% if metadata.split != "test" %}
43
+
44
+ Question: "{{question}}"
45
+
46
+
47
+ Context: "{{context}}"
48
+
49
+
50
+ Answer:
51
+
52
+ |||
53
+
54
+ {{answers.text | choice}}
55
+
56
+ {% endif %}'
57
+ metadata: !TemplateMetadata
58
+ choices_in_prompt: false
59
+ metrics:
60
+ - Squad
61
+ original_task: true
62
+ name: question_context_answer
63
+ reference: 'Input: QC, Output: Answer (short form)'
64
+ a0872cde-2f19-4ae6-919a-868da47bfbca: !Template
65
+ answer_choices: null
66
+ id: a0872cde-2f19-4ae6-919a-868da47bfbca
67
+ jinja: '{% if metadata.split != "test" %}
68
+
69
+ Extract the answer to the question from the following context.
70
+
71
+ Question: {{question}}
72
+
73
+ Context: {{context}}|||
74
+
75
+ {{answers.text | choice}}
76
+
77
+ {% endif %}'
78
+ metadata: !TemplateMetadata
79
+ choices_in_prompt: false
80
+ metrics:
81
+ - Squad
82
+ original_task: true
83
+ name: based_on
84
+ reference: ''
85
+ a64d5a15-68e2-4d1c-b30a-ca8250c860fa: !Template
86
+ answer_choices: null
87
+ id: a64d5a15-68e2-4d1c-b30a-ca8250c860fa
88
+ jinja: '{% if metadata.split != "test" %}
89
+
90
+ Given the following passage
91
+
92
+
93
+ "{{context}}",
94
+
95
+
96
+ answer the following question. Note that the answer is present within the text.
97
+
98
+
99
+ Question: {{question}} |||
100
+
101
+ {{answers.text | choice}}
102
+
103
+ {% endif %}'
104
+ metadata: !TemplateMetadata
105
+ choices_in_prompt: false
106
+ metrics:
107
+ - Squad
108
+ original_task: true
109
+ name: answer_the_following_q
110
+ reference: 'Input: QC, Output: Answer'
promptsource/templates/adversarial_qa/dbidaf/templates.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: adversarial_qa
2
+ subset: dbidaf
3
+ templates:
4
+ 41f28b31-d0fc-4f20-a0a2-ff21813e298e: !Template
5
+ answer_choices: null
6
+ id: 41f28b31-d0fc-4f20-a0a2-ff21813e298e
7
+ jinja: '{% if metadata.split != "test" %}
8
+
9
+ Extract the answer to the question from the following context.
10
+
11
+ Question: {{question}}
12
+
13
+ Context: {{context}}|||
14
+
15
+ {{answers.text | choice}}
16
+
17
+ {% endif %}'
18
+ metadata: !TemplateMetadata
19
+ choices_in_prompt: false
20
+ metrics:
21
+ - Squad
22
+ original_task: true
23
+ name: based_on
24
+ reference: ''
25
+ a64d5a15-68e2-4d1c-b30a-ca8250c860d9: !Template
26
+ answer_choices: null
27
+ id: a64d5a15-68e2-4d1c-b30a-ca8250c860d9
28
+ jinja: '{% if metadata.split != "test" %}
29
+
30
+ Given the following passage
31
+
32
+
33
+ "{{context}}",
34
+
35
+
36
+ answer the following question. Note that the answer is present within the text.
37
+
38
+
39
+ Question: {{question}} |||
40
+
41
+ {{answers.text | choice}}
42
+
43
+ {% endif %}'
44
+ metadata: !TemplateMetadata
45
+ choices_in_prompt: false
46
+ metrics:
47
+ - Squad
48
+ original_task: true
49
+ name: answer_the_following_q
50
+ reference: 'Input: QC, Output: Answer'
51
+ c7a80603-d610-4999-98a7-815b2f84592d: !Template
52
+ answer_choices: null
53
+ id: c7a80603-d610-4999-98a7-815b2f84592d
54
+ jinja: 'I want to test the ability of students to read a passage and answer questions
55
+ about it. Could you please come up with a good question for the passage "{{context}}"?
56
+ |||
57
+
58
+ {{question}}'
59
+ metadata: !TemplateMetadata
60
+ choices_in_prompt: false
61
+ metrics:
62
+ - BLEU
63
+ - ROUGE
64
+ original_task: false
65
+ name: generate_question
66
+ reference: 'Input: Context, Output: Question (generate a question)'
67
+ ce9bc00a-567b-4c4e-aad7-df6f5d5d57bb: !Template
68
+ answer_choices: null
69
+ id: ce9bc00a-567b-4c4e-aad7-df6f5d5d57bb
70
+ jinja: '{% if metadata.split != "test" %}
71
+
72
+ I know that the answer to the question "{{question}}" is in "{{context}}". Can
73
+ you tell me what it is? |||
74
+
75
+
76
+ {{answers.text | choice}}
77
+
78
+ {% endif %}'
79
+ metadata: !TemplateMetadata
80
+ choices_in_prompt: false
81
+ metrics:
82
+ - Squad
83
+ original_task: true
84
+ name: tell_what_it_is
85
+ reference: 'Input: QC, Output: A (rephrase)'
86
+ fa185424-6ebe-49b8-b4ed-7632ca33c361: !Template
87
+ answer_choices: null
88
+ id: fa185424-6ebe-49b8-b4ed-7632ca33c361
89
+ jinja: '{% if metadata.split != "test" %}
90
+
91
+ Question: "{{question}}"
92
+
93
+
94
+ Context: "{{context}}"
95
+
96
+
97
+ Answer:
98
+
99
+ |||
100
+
101
+ {{answers.text | choice}}
102
+
103
+ {% endif %}'
104
+ metadata: !TemplateMetadata
105
+ choices_in_prompt: false
106
+ metrics:
107
+ - Squad
108
+ original_task: true
109
+ name: question_context_answer
110
+ reference: 'Input: QC, Output: Answer (short form)'
promptsource/templates/adversarial_qa/droberta/templates.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: adversarial_qa
2
+ subset: droberta
3
+ templates:
4
+ 00755780-f3c0-44b4-b159-8f3873cdb163: !Template
5
+ answer_choices: null
6
+ id: 00755780-f3c0-44b4-b159-8f3873cdb163
7
+ jinja: 'I want to test the ability of students to read a passage and answer questions
8
+ about it. Could you please come up with a good question for the passage "{{context}}"?
9
+ |||
10
+
11
+ {{question}}'
12
+ metadata: !TemplateMetadata
13
+ choices_in_prompt: false
14
+ metrics:
15
+ - BLEU
16
+ - ROUGE
17
+ original_task: false
18
+ name: generate_question
19
+ reference: 'Input: Context, Output: Question (generate a question)'
20
+ 3b2459cc-6600-443c-abf8-8f60c34cd993: !Template
21
+ answer_choices: null
22
+ id: 3b2459cc-6600-443c-abf8-8f60c34cd993
23
+ jinja: '{% if metadata.split != "test" %}
24
+
25
+ I know that the answer to the question "{{question}}" is in "{{context}}". Can
26
+ you tell me what it is? |||
27
+
28
+
29
+ {{answers.text | choice}}
30
+
31
+ {% endif %}'
32
+ metadata: !TemplateMetadata
33
+ choices_in_prompt: false
34
+ metrics:
35
+ - Squad
36
+ original_task: true
37
+ name: tell_what_it_is
38
+ reference: 'Input: QC, Output: A (rephrase)'
39
+ 5bdb1815-5c6f-49a3-ad1d-367344420703: !Template
40
+ answer_choices: null
41
+ id: 5bdb1815-5c6f-49a3-ad1d-367344420703
42
+ jinja: '{% if metadata.split != "test" %}
43
+
44
+ Question: "{{question}}"
45
+
46
+
47
+ Context: "{{context}}"
48
+
49
+
50
+ Answer:
51
+
52
+ |||
53
+
54
+ {{answers.text | choice}}
55
+
56
+ {% endif %}'
57
+ metadata: !TemplateMetadata
58
+ choices_in_prompt: false
59
+ metrics:
60
+ - Squad
61
+ original_task: true
62
+ name: question_context_answer
63
+ reference: 'Input: QC, Output: Answer (short form)'
64
+ a0872cde-2f19-4ae6-919a-868da47bfbc3: !Template
65
+ answer_choices: null
66
+ id: a0872cde-2f19-4ae6-919a-868da47bfbc3
67
+ jinja: '{% if metadata.split != "test" %}
68
+
69
+ Extract the answer to the question from the following context.
70
+
71
+ Question: {{question}}
72
+
73
+ Context: {{context}}|||
74
+
75
+ {{answers.text | choice}}
76
+
77
+ {% endif %}'
78
+ metadata: !TemplateMetadata
79
+ choices_in_prompt: false
80
+ metrics:
81
+ - Squad
82
+ original_task: true
83
+ name: based_on
84
+ reference: ''
85
+ a64d5a15-68e2-4d1c-b30a-ca8250c860f3: !Template
86
+ answer_choices: null
87
+ id: a64d5a15-68e2-4d1c-b30a-ca8250c860f3
88
+ jinja: '{% if metadata.split != "test" %}
89
+
90
+ Given the following passage
91
+
92
+
93
+ "{{context}}",
94
+
95
+
96
+ answer the following question. Note that the answer is present within the text.
97
+
98
+
99
+ Question: {{question}} |||
100
+
101
+ {{answers.text | choice}}
102
+
103
+ {% endif %}'
104
+ metadata: !TemplateMetadata
105
+ choices_in_prompt: false
106
+ metrics:
107
+ - Squad
108
+ original_task: true
109
+ name: answer_the_following_q
110
+ reference: 'Input: QC, Output: Answer'
promptsource/templates/aeslc/templates.yaml ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: aeslc
2
+ templates:
3
+ 0bef38b8-6d0b-440b-8a3d-db034aaf5a15: !Template
4
+ answer_choices: null
5
+ id: 0bef38b8-6d0b-440b-8a3d-db034aaf5a15
6
+ jinja: '{{ email_body }}
7
+
8
+
9
+ What is this email about? |||
10
+
11
+
12
+ {{ subject_line }}'
13
+ metadata: !TemplateMetadata
14
+ choices_in_prompt: null
15
+ metrics: []
16
+ original_task: true
17
+ name: context_question_2
18
+ reference: ''
19
+ 11de8b2c-8016-4b98-b5f2-c1a7e5c0e433: !Template
20
+ answer_choices: null
21
+ id: 11de8b2c-8016-4b98-b5f2-c1a7e5c0e433
22
+ jinja: 'What is the subject of this email:
23
+
24
+
25
+ {{ email_body }} |||
26
+
27
+
28
+ {{ subject_line }}'
29
+ metadata: !TemplateMetadata
30
+ choices_in_prompt: null
31
+ metrics: []
32
+ original_task: true
33
+ name: question_context_1
34
+ reference: ''
35
+ 12616e45-1d61-4924-8ce4-fe3efd061e7a: !Template
36
+ answer_choices: null
37
+ id: 12616e45-1d61-4924-8ce4-fe3efd061e7a
38
+ jinja: 'The text below is the content of an email. What is the topic of this email?
39
+
40
+
41
+ {{ email_body }} |||
42
+
43
+
44
+ {{ subject_line }}'
45
+ metadata: !TemplateMetadata
46
+ choices_in_prompt: null
47
+ metrics: []
48
+ original_task: true
49
+ name: question_context_4
50
+ reference: ''
51
+ 25179c66-5638-4de5-bdce-d6dccec64c65: !Template
52
+ answer_choices: null
53
+ id: 25179c66-5638-4de5-bdce-d6dccec64c65
54
+ jinja: 'Choose a subject line for the email body below:
55
+
56
+
57
+ {{ email_body }} |||
58
+
59
+
60
+ {{ subject_line }}'
61
+ metadata: !TemplateMetadata
62
+ choices_in_prompt: null
63
+ metrics: []
64
+ original_task: true
65
+ name: question_context_3
66
+ reference: ''
67
+ 8917d7f0-5f72-418f-a2d9-98d4a8da13b0: !Template
68
+ answer_choices: null
69
+ id: 8917d7f0-5f72-418f-a2d9-98d4a8da13b0
70
+ jinja: 'What is this email about:
71
+
72
+
73
+ {{ email_body }} |||
74
+
75
+
76
+ {{ subject_line }}'
77
+ metadata: !TemplateMetadata
78
+ choices_in_prompt: null
79
+ metrics: []
80
+ original_task: true
81
+ name: question_context_2
82
+ reference: ''
83
+ d1c5da3f-f1e4-4891-abcb-79463b30a616: !Template
84
+ answer_choices: null
85
+ id: d1c5da3f-f1e4-4891-abcb-79463b30a616
86
+ jinja: '{{ email_body }}
87
+
88
+
89
+ What is the subject of this email? |||
90
+
91
+
92
+ {{ subject_line }}'
93
+ metadata: !TemplateMetadata
94
+ choices_in_prompt: null
95
+ metrics: []
96
+ original_task: true
97
+ name: context_question_1
98
+ reference: ''
99
+ d9dd8e72-acb4-4aad-aeb7-a877bacbb402: !Template
100
+ answer_choices: null
101
+ id: d9dd8e72-acb4-4aad-aeb7-a877bacbb402
102
+ jinja: '{{ email_body }}
103
+
104
+
105
+ Choose a subject line for the email body above. |||
106
+
107
+
108
+ {{ subject_line }}'
109
+ metadata: !TemplateMetadata
110
+ choices_in_prompt: null
111
+ metrics: []
112
+ original_task: true
113
+ name: context_question_3
114
+ reference: ''
115
+ dca29ebb-2372-423f-b93c-21d99eddf455: !Template
116
+ answer_choices: null
117
+ id: dca29ebb-2372-423f-b93c-21d99eddf455
118
+ jinja: '{{ email_body }}
119
+
120
+
121
+ The above text is the content of an email. What is the topic of this email?
122
+ |||
123
+
124
+
125
+ {{ subject_line }} '
126
+ metadata: !TemplateMetadata
127
+ choices_in_prompt: null
128
+ metrics: []
129
+ original_task: true
130
+ name: context_question_4
131
+ reference: ''
promptsource/templates/ag_news/templates.yaml ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: ag_news
2
+ templates:
3
+ 24e44a81-a18a-42dd-a71c-5b31b2d2cb39: !Template
4
+ answer_choices: World politics ||| Sports ||| Business ||| Science and technology
5
+ id: 24e44a81-a18a-42dd-a71c-5b31b2d2cb39
6
+ jinja: "What label best describes this news article?\n{{text}} ||| \n{{answer_choices[label]\
7
+ \ }}"
8
+ metadata: !TemplateMetadata
9
+ choices_in_prompt: false
10
+ metrics:
11
+ - Accuracy
12
+ original_task: true
13
+ name: classify_question_first
14
+ reference: ''
15
+ 8fdc1056-1029-41a1-9c67-354fc2b8ceaf: !Template
16
+ answer_choices: World politics ||| Sports ||| Business ||| Science and technology
17
+ id: 8fdc1056-1029-41a1-9c67-354fc2b8ceaf
18
+ jinja: "Is this a piece of news regarding {{\"world politics, sports, business,\
19
+ \ or science and technology\"}}?\n{{text}} \n||| \n{{answer_choices[label] }}"
20
+ metadata: !TemplateMetadata
21
+ choices_in_prompt: true
22
+ metrics:
23
+ - Accuracy
24
+ original_task: true
25
+ name: classify_with_choices_question_first
26
+ reference: ''
27
+ 918267e0-af68-4117-892d-2dbe66a58ce9: !Template
28
+ answer_choices: Politician ||| Athlete ||| Business executive ||| Scientist
29
+ id: 918267e0-af68-4117-892d-2dbe66a58ce9
30
+ jinja: 'Would you recommend the following article to a {{"politician"}}, an {{"athlete"}},
31
+ a {{"business executive"}}, or a {{"scientist"}}?
32
+
33
+
34
+ {{ text }}
35
+
36
+ |||
37
+
38
+ {{answer_choices[label]}}'
39
+ metadata: !TemplateMetadata
40
+ choices_in_prompt: true
41
+ metrics:
42
+ - Accuracy
43
+ original_task: true
44
+ name: recommend
45
+ reference: ''
46
+ 9345df33-4f23-4944-a33c-eef94e626862: !Template
47
+ answer_choices: World News ||| Sports ||| Business ||| Science and Technology
48
+ id: 9345df33-4f23-4944-a33c-eef94e626862
49
+ jinja: "{{text}} \n\nWhich of the following sections of a newspaper would this\
50
+ \ article likely appear in? {{\"World News\"}}, {{\"Sports\"}}, {{\"Business\"\
51
+ }}, or {{\"Science and Technology\"}}? ||| \n{{answer_choices[label] }}"
52
+ metadata: !TemplateMetadata
53
+ choices_in_prompt: true
54
+ metrics:
55
+ - Accuracy
56
+ original_task: true
57
+ name: which_section_choices
58
+ reference: ''
59
+ 98534347-fff7-4c39-a795-4e69a44791f7: !Template
60
+ answer_choices: World News ||| Sports ||| Business ||| Science and Technology
61
+ id: 98534347-fff7-4c39-a795-4e69a44791f7
62
+ jinja: "{{text}} \n\nWhich section of a newspaper would this article likely appear\
63
+ \ in? ||| \n{{answer_choices[label] }}"
64
+ metadata: !TemplateMetadata
65
+ choices_in_prompt: false
66
+ metrics:
67
+ - Accuracy
68
+ original_task: true
69
+ name: which_section
70
+ reference: ''
71
+ b401b0ee-6ffe-4a91-8e15-77ee073cd858: !Template
72
+ answer_choices: World politics ||| Sports ||| Business ||| Science and technology
73
+ id: b401b0ee-6ffe-4a91-8e15-77ee073cd858
74
+ jinja: "{{text}} \nIs this a piece of news regarding {{\"world politics, sports,\
75
+ \ business, or science and technology\"}}? ||| \n{{answer_choices[label] }}"
76
+ metadata: !TemplateMetadata
77
+ choices_in_prompt: true
78
+ metrics:
79
+ - Accuracy
80
+ original_task: true
81
+ name: classify_with_choices
82
+ reference: ''
83
+ cb355f33-7e8c-4455-a72b-48d315bd4f60: !Template
84
+ answer_choices: World politics ||| Sports ||| Business ||| Science and technology
85
+ id: cb355f33-7e8c-4455-a72b-48d315bd4f60
86
+ jinja: "{{text}} \nWhat label best describes this news article? ||| \n{{answer_choices[label]\
87
+ \ }}"
88
+ metadata: !TemplateMetadata
89
+ choices_in_prompt: false
90
+ metrics:
91
+ - Accuracy
92
+ original_task: true
93
+ name: classify
94
+ reference: ''
promptsource/templates/ai2_arc/ARC-Challenge/templates.yaml ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: ai2_arc
2
+ subset: ARC-Challenge
3
+ templates:
4
+ 32f7eb4d-dd38-4503-b67d-a8a96ab40449: !Template
5
+ answer_choices: null
6
+ id: 32f7eb4d-dd38-4503-b67d-a8a96ab40449
7
+ jinja: 'Pick and copy all the incorrect options for the following question:
8
+
9
+
10
+ {{question}}
11
+
12
+
13
+ Options:
14
+
15
+ - {{choices["text"] | join("\n- ")}}|||
16
+
17
+ {% for i in range(choices["label"]|length) %}
18
+
19
+ {% if i != choices["label"].index(answerKey) %}
20
+
21
+ - {{choices["text"][i]}}
22
+
23
+ {% endif %}
24
+
25
+ {% endfor %}'
26
+ metadata: !TemplateMetadata
27
+ choices_in_prompt: true
28
+ metrics:
29
+ - Accuracy
30
+ - Other
31
+ original_task: false
32
+ name: pick_false_options
33
+ reference: ''
34
+ 540ebc31-2ea6-4feb-a6fd-67b6e71cf20a: !Template
35
+ answer_choices: A ||| B ||| C ||| D
36
+ id: 540ebc31-2ea6-4feb-a6fd-67b6e71cf20a
37
+ jinja: "Here's a problem to solve: {{question}}\n\nAmong the 4 following options,\
38
+ \ which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text)\
39
+ \ %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}"
40
+ metadata: !TemplateMetadata
41
+ choices_in_prompt: true
42
+ metrics:
43
+ - Accuracy
44
+ original_task: true
45
+ name: heres_a_problem
46
+ reference: ''
47
+ 5ec2b8ca-e4c0-444e-b097-89ccce811550: !Template
48
+ answer_choices: '{{choices.text | join("|||")}}'
49
+ id: 5ec2b8ca-e4c0-444e-b097-89ccce811550
50
+ jinja: '{{question}}
51
+
52
+
53
+ Options:
54
+
55
+ - {{answer_choices | join("\n- ")}}|||
56
+
57
+ {{answer_choices[choices["label"].index(answerKey)]}}'
58
+ metadata: !TemplateMetadata
59
+ choices_in_prompt: true
60
+ metrics:
61
+ - Accuracy
62
+ original_task: true
63
+ name: qa_options
64
+ reference: ''
65
+ 5ff84886-9d5f-40d1-80d7-2a39b7c16ec6: !Template
66
+ answer_choices: '{{choices.text | join("|||")}}'
67
+ id: 5ff84886-9d5f-40d1-80d7-2a39b7c16ec6
68
+ jinja: 'I am hesitating between 4 options to answer the following question, which
69
+ option should I choose?
70
+
71
+ Question: {{question}}
72
+
73
+ Possibilities:
74
+
75
+ - {{answer_choices | join("\n- ")}}|||
76
+
77
+ {{answer_choices[choices["label"].index(answerKey)]}}'
78
+ metadata: !TemplateMetadata
79
+ choices_in_prompt: true
80
+ metrics:
81
+ - Accuracy
82
+ original_task: true
83
+ name: i_am_hesitating
84
+ reference: ''
85
+ ced2b33b-b590-4522-b041-51d7dd669561: !Template
86
+ answer_choices: '{{choices.text | join("|||")}}'
87
+ id: ced2b33b-b590-4522-b041-51d7dd669561
88
+ jinja: 'I gave my students this multiple choice question: {{question}}
89
+
90
+
91
+ Only one answer is correct among these 4 choices:
92
+
93
+ - {{answer_choices | join("\n- ")}}
94
+
95
+
96
+ Could you tell me which one is correct?|||
97
+
98
+ {{answer_choices[choices["label"].index(answerKey)]}}'
99
+ metadata: !TemplateMetadata
100
+ choices_in_prompt: true
101
+ metrics:
102
+ - Accuracy
103
+ original_task: true
104
+ name: multiple_choice
105
+ reference: ''
106
+ e371fc1a-8edb-477b-b345-9d73e97ffade: !Template
107
+ answer_choices: A ||| B ||| C ||| D
108
+ id: e371fc1a-8edb-477b-b345-9d73e97ffade
109
+ jinja: 'Pick the most correct option to answer the following question.
110
+
111
+
112
+ {{question}}
113
+
114
+
115
+ Options:
116
+
117
+ {% for letter, t in zip(answer_choices, choices.text) %}
118
+
119
+ - {{letter}}: {{t}}
120
+
121
+ {% endfor %} |||
122
+
123
+ {{answerKey}}'
124
+ metadata: !TemplateMetadata
125
+ choices_in_prompt: true
126
+ metrics:
127
+ - Accuracy
128
+ original_task: true
129
+ name: pick_the_most_correct_option
130
+ reference: ''
promptsource/templates/ai2_arc/ARC-Easy/templates.yaml ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: ai2_arc
2
+ subset: ARC-Easy
3
+ templates:
4
+ 033498ca-3d9a-47e3-b631-d881ab53b5ad: !Template
5
+ answer_choices: A ||| B ||| C ||| D
6
+ id: 033498ca-3d9a-47e3-b631-d881ab53b5ad
7
+ jinja: 'Pick the most correct option to answer the following question.
8
+
9
+
10
+ {{question}}
11
+
12
+
13
+ Options:
14
+
15
+ {% for letter, t in zip(answer_choices, choices.text) %}
16
+
17
+ - {{letter}}: {{t}}
18
+
19
+ {% endfor %} |||
20
+
21
+ {{answerKey}}'
22
+ metadata: !TemplateMetadata
23
+ choices_in_prompt: true
24
+ metrics:
25
+ - Accuracy
26
+ original_task: true
27
+ name: pick_the_most_correct_option
28
+ reference: ''
29
+ 252aa566-9482-4e81-aad9-664a9bebd8e8: !Template
30
+ answer_choices: '{{choices.text | join("|||")}}'
31
+ id: 252aa566-9482-4e81-aad9-664a9bebd8e8
32
+ jinja: '{{question}}
33
+
34
+
35
+ Options:
36
+
37
+ - {{answer_choices | join("\n- ")}}|||
38
+
39
+ {{answer_choices[choices["label"].index(answerKey)]}}'
40
+ metadata: !TemplateMetadata
41
+ choices_in_prompt: true
42
+ metrics:
43
+ - Accuracy
44
+ original_task: true
45
+ name: qa_options
46
+ reference: ''
47
+ 4fb13ac1-f770-45ea-b5d5-91ac50b0d609: !Template
48
+ answer_choices: '{{choices.text | join("|||")}}'
49
+ id: 4fb13ac1-f770-45ea-b5d5-91ac50b0d609
50
+ jinja: 'I am hesitating between 4 options to answer the following question, which
51
+ option should I choose?
52
+
53
+ Question: {{question}}
54
+
55
+ Possibilities:
56
+
57
+ - {{answer_choices | join("\n- ")}}|||
58
+
59
+ {{answer_choices[choices["label"].index(answerKey)]}}'
60
+ metadata: !TemplateMetadata
61
+ choices_in_prompt: true
62
+ metrics:
63
+ - Accuracy
64
+ original_task: true
65
+ name: i_am_hesitating
66
+ reference: ''
67
+ 8c689423-880d-402b-8c7d-a1a98c7589e8: !Template
68
+ answer_choices: '{{choices.text | join("|||")}}'
69
+ id: 8c689423-880d-402b-8c7d-a1a98c7589e8
70
+ jinja: 'I gave my students this multiple choice question: {{question}}
71
+
72
+
73
+ Only one answer is correct among these 4 choices:
74
+
75
+ - {{answer_choices | join("\n- ")}}
76
+
77
+
78
+ Could you tell me which one is correct?|||
79
+
80
+ {{answer_choices[choices["label"].index(answerKey)]}}'
81
+ metadata: !TemplateMetadata
82
+ choices_in_prompt: true
83
+ metrics:
84
+ - Accuracy
85
+ original_task: true
86
+ name: multiple_choice
87
+ reference: ''
88
+ c988ee30-a523-457b-af21-87353349b543: !Template
89
+ answer_choices: null
90
+ id: c988ee30-a523-457b-af21-87353349b543
91
+ jinja: 'Pick and copy all the incorrect options for the following question:
92
+
93
+
94
+ {{question}}
95
+
96
+
97
+ Options:
98
+
99
+ - {{choices["text"] | join("\n- ")}}|||
100
+
101
+ {% for i in range(choices["label"]|length) %}
102
+
103
+ {% if i != choices["label"].index(answerKey) %}
104
+
105
+ - {{choices["text"][i]}}
106
+
107
+ {% endif %}
108
+
109
+ {% endfor %}'
110
+ metadata: !TemplateMetadata
111
+ choices_in_prompt: true
112
+ metrics:
113
+ - Accuracy
114
+ - Other
115
+ original_task: false
116
+ name: pick_false_options
117
+ reference: ''
118
+ d90da519-0e2c-4f9b-a546-7cba82824eb2: !Template
119
+ answer_choices: A ||| B ||| C ||| D
120
+ id: d90da519-0e2c-4f9b-a546-7cba82824eb2
121
+ jinja: "Here's a problem to solve: {{question}}\n\nAmong the 4 following options,\
122
+ \ which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text)\
123
+ \ %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}"
124
+ metadata: !TemplateMetadata
125
+ choices_in_prompt: true
126
+ metrics:
127
+ - Accuracy
128
+ original_task: true
129
+ name: heres_a_problem
130
+ reference: ''
promptsource/templates/amazon_polarity/templates.yaml ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: amazon_polarity
2
+ templates:
3
+ 1e90a24a-1182-43dd-9445-22f2e56e5761: !Template
4
+ answer_choices: Negative ||| Positive
5
+ id: 1e90a24a-1182-43dd-9445-22f2e56e5761
6
+ jinja: 'Title: {{title}}
7
+
8
+ Review: {{content}}
9
+
10
+ Is the review positive or negative? |||
11
+
12
+ {{answer_choices[label]}}'
13
+ metadata: !TemplateMetadata
14
+ choices_in_prompt: true
15
+ metrics:
16
+ - Accuracy
17
+ original_task: true
18
+ name: Is_this_review
19
+ reference: ''
20
+ 3a48f287-6a4b-4df0-ab2d-2eaf6cb8e53d: !Template
21
+ answer_choices: No ||| Yes
22
+ id: 3a48f287-6a4b-4df0-ab2d-2eaf6cb8e53d
23
+ jinja: 'Based on this review, would the user recommend this product?
24
+
25
+ ===
26
+
27
+ Review: {{content}}
28
+
29
+ Answer: |||
30
+
31
+ {{answer_choices[label]}}'
32
+ metadata: !TemplateMetadata
33
+ choices_in_prompt: false
34
+ metrics:
35
+ - Accuracy
36
+ original_task: true
37
+ name: User_recommend_this_product
38
+ reference: 'Reformulation equivalent to sent analysis: would the user recommend
39
+ this product?'
40
+ 592caf8f-f8ff-426a-a61b-b7e95ed510b6: !Template
41
+ answer_choices: No ||| Yes
42
+ id: 592caf8f-f8ff-426a-a61b-b7e95ed510b6
43
+ jinja: 'Is this product review positive?
44
+
45
+ Title: {{title}}
46
+
47
+ Review: {{content}}
48
+
49
+ Answer: |||
50
+
51
+ {{answer_choices[label]}}'
52
+ metadata: !TemplateMetadata
53
+ choices_in_prompt: false
54
+ metrics:
55
+ - Accuracy
56
+ original_task: true
57
+ name: Is_this_product_review_positive
58
+ reference: ''
59
+ 745b9c05-10df-4a7e-81ad-1b88cefcb166: !Template
60
+ answer_choices: Yes ||| No
61
+ id: 745b9c05-10df-4a7e-81ad-1b88cefcb166
62
+ jinja: 'Title: {{title}}
63
+
64
+ Review: {{content}}
65
+
66
+ Is this product review negative?|||
67
+
68
+ {{answer_choices[label]}}'
69
+ metadata: !TemplateMetadata
70
+ choices_in_prompt: false
71
+ metrics:
72
+ - Accuracy
73
+ original_task: true
74
+ name: Is_this_review_negative
75
+ reference: ''
76
+ 8abb5377-5dd3-4402-92a5-0d81adb6a325: !Template
77
+ answer_choices: Negative ||| Positive
78
+ id: 8abb5377-5dd3-4402-92a5-0d81adb6a325
79
+ jinja: 'Title: {{title}}
80
+
81
+ Review: {{content}}
82
+
83
+ Does this product review convey a negative or positive sentiment?|||
84
+
85
+ {{answer_choices[label]}}'
86
+ metadata: !TemplateMetadata
87
+ choices_in_prompt: true
88
+ metrics:
89
+ - Accuracy
90
+ original_task: true
91
+ name: convey_negative_or_positive_sentiment
92
+ reference: ''
93
+ 9df70cdf-f8ed-4e79-8e2f-b4668058d637: !Template
94
+ answer_choices: Negative ||| Positive
95
+ id: 9df70cdf-f8ed-4e79-8e2f-b4668058d637
96
+ jinja: 'Is there a negative or positive tone to this product review?
97
+
98
+ ===
99
+
100
+ Title: {{title}}
101
+
102
+ Review: {{content}}
103
+
104
+ Answer: |||
105
+
106
+ {{answer_choices[label]}}'
107
+ metadata: !TemplateMetadata
108
+ choices_in_prompt: true
109
+ metrics:
110
+ - Accuracy
111
+ original_task: true
112
+ name: negative_or_positive_tone
113
+ reference: ''
114
+ b13369e8-0500-4e93-90d4-8e6814bfb97b: !Template
115
+ answer_choices: dissatisfied ||| satisfied
116
+ id: b13369e8-0500-4e93-90d4-8e6814bfb97b
117
+ jinja: 'Here is a review left by a customer on a product. Would you say he was
118
+ {{answer_choices[1]}} or {{answer_choices[0]}}?
119
+
120
+ Title: {{title}}
121
+
122
+ Review: {{content}}
123
+
124
+ |||
125
+
126
+ {{answer_choices[label]}} '
127
+ metadata: !TemplateMetadata
128
+ choices_in_prompt: true
129
+ metrics:
130
+ - Accuracy
131
+ original_task: true
132
+ name: user_satisfied
133
+ reference: ''
134
+ b13369e8-0500-4e93-90d4-8e6814bfb98b: !Template
135
+ answer_choices: decrease ||| increase
136
+ id: b13369e8-0500-4e93-90d4-8e6814bfb98b
137
+ jinja: 'You are considering whether to buy a product. You look at the reviews.
138
+ Would the following review {{answer_choices[0]}} or {{answer_choices[1]}} the
139
+ chances of you buying the product?
140
+
141
+ Review title: {{title}}
142
+
143
+ Product review: {{content}}
144
+
145
+ |||
146
+
147
+ {{answer_choices[label]}} '
148
+ metadata: !TemplateMetadata
149
+ choices_in_prompt: true
150
+ metrics:
151
+ - Accuracy
152
+ original_task: true
153
+ name: would_you_buy
154
+ reference: ''
155
+ b13369e8-0500-4e93-90d4-8e6814bfb99b: !Template
156
+ answer_choices: unflattering ||| flattering
157
+ id: b13369e8-0500-4e93-90d4-8e6814bfb99b
158
+ jinja: 'Title: {{title}}
159
+
160
+ Product review: {{content}}
161
+
162
+ Would you say this review depicts the product in a {{answer_choices[1]}} or
163
+ {{answer_choices[0]}} light?
164
+
165
+ |||
166
+
167
+ {{answer_choices[label]}} '
168
+ metadata: !TemplateMetadata
169
+ choices_in_prompt: true
170
+ metrics:
171
+ - Accuracy
172
+ original_task: true
173
+ name: flattering_or_not
174
+ reference: ''
promptsource/templates/amazon_reviews_multi/en/templates.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: amazon_reviews_multi
2
+ subset: en
3
+ templates:
4
+ 073dfd34-5aef-461a-81d9-bdb8e00f12c9: !Template
5
+ answer_choices: null
6
+ id: 073dfd34-5aef-461a-81d9-bdb8e00f12c9
7
+ jinja: 'Write a review title for the review below:
8
+
9
+ ===
10
+
11
+ {{review_body}} |||
12
+
13
+ {{review_title}}'
14
+ metadata: !TemplateMetadata
15
+ choices_in_prompt: null
16
+ metrics: []
17
+ original_task: null
18
+ name: Template_2
19
+ reference: Review Title based on Review body
20
+ 0f5b005b-c6bc-4fe0-bde4-0917cdba39e8: !Template
21
+ answer_choices: null
22
+ id: 0f5b005b-c6bc-4fe0-bde4-0917cdba39e8
23
+ jinja: 'Rate the product by the number of stars based on the review title below:
24
+ (1 being the lowest and 5 the highest)
25
+
26
+ ===
27
+
28
+ {{review_title}} |||
29
+
30
+ {{stars}}'
31
+ metadata: !TemplateMetadata
32
+ choices_in_prompt: null
33
+ metrics: []
34
+ original_task: null
35
+ name: Template_5
36
+ reference: Rating based on review title
37
+ 199ad6de-5bcc-421e-90e2-4b6edada6a01: !Template
38
+ answer_choices: null
39
+ id: 199ad6de-5bcc-421e-90e2-4b6edada6a01
40
+ jinja: 'Rate the product by the number of stars based on the review body below:
41
+ (1 being the lowest and 5 the highest)
42
+
43
+ ===
44
+
45
+ {{review_body}} |||
46
+
47
+ {{stars}}'
48
+ metadata: !TemplateMetadata
49
+ choices_in_prompt: null
50
+ metrics: []
51
+ original_task: null
52
+ name: Template_4
53
+ reference: Rating based on review body
54
+ 7ecaf718-c85d-47f4-83cb-f14c58f2911f: !Template
55
+ answer_choices: null
56
+ id: 7ecaf718-c85d-47f4-83cb-f14c58f2911f
57
+ jinja: 'Guess the product category for which the below review is:
58
+
59
+ ===
60
+
61
+ {{review_body}} |||
62
+
63
+ {{product_category}}'
64
+ metadata: !TemplateMetadata
65
+ choices_in_prompt: null
66
+ metrics: []
67
+ original_task: null
68
+ name: Template_1
69
+ reference: Product category based on review body
70
+ c4717e75-4d3e-4b79-9737-167155f51513: !Template
71
+ answer_choices: null
72
+ id: c4717e75-4d3e-4b79-9737-167155f51513
73
+ jinja: 'Guess the product category from the below review title:
74
+
75
+ ===
76
+
77
+ {{review_title}} |||
78
+
79
+ {{product_category}}'
80
+ metadata: !TemplateMetadata
81
+ choices_in_prompt: null
82
+ metrics: []
83
+ original_task: null
84
+ name: Template_3
85
+ reference: Product category from review title
promptsource/templates/amazon_us_reviews/Wireless_v1_00/templates.yaml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: amazon_us_reviews
2
+ subset: Wireless_v1_00
3
+ templates:
4
+ 5feaa0d7-e4e0-46cc-8517-e00bfa7fd00e: !Template
5
+ answer_choices: null
6
+ id: 5feaa0d7-e4e0-46cc-8517-e00bfa7fd00e
7
+ jinja: 'Summarize a review headline for the review below: === {{review_body}}
8
+ ||| {{review_headline}}'
9
+ metadata: !TemplateMetadata
10
+ choices_in_prompt: null
11
+ metrics: []
12
+ original_task: false
13
+ name: Template_6
14
+ reference: Generate review headline based on review body
15
+ 957e3322-6907-4e67-bfbe-6ed8862f352c: !Template
16
+ answer_choices: null
17
+ id: 957e3322-6907-4e67-bfbe-6ed8862f352c
18
+ jinja: 'Guess the product category for which the below review is: === {{review_body}}
19
+ ||| {{product_category}}'
20
+ metadata: !TemplateMetadata
21
+ choices_in_prompt: null
22
+ metrics: []
23
+ original_task: false
24
+ name: Template_2
25
+ reference: Predict the product category based on review
26
+ 9588a967-d698-4a33-9b96-a5254df9d260: !Template
27
+ answer_choices: null
28
+ id: 9588a967-d698-4a33-9b96-a5254df9d260
29
+ jinja: Generate a {{star_rating}}-star review (1 being lowest and 5 being highest)
30
+ about this product in {{product_category}} category. ||| {{review_body}}
31
+ metadata: !TemplateMetadata
32
+ choices_in_prompt: null
33
+ metrics: []
34
+ original_task: false
35
+ name: Template_1
36
+ reference: Generate review based on rating and category
37
+ 9a8b953d-2c68-4046-a7b7-8fd5f7469d10: !Template
38
+ answer_choices: null
39
+ id: 9a8b953d-2c68-4046-a7b7-8fd5f7469d10
40
+ jinja: 'How would you rate this review from 1 to 5 (1 being lowest and 5 being
41
+ highest): {{review_headline}}? ||| {{star_rating}}'
42
+ metadata: !TemplateMetadata
43
+ choices_in_prompt: null
44
+ metrics: []
45
+ original_task: true
46
+ name: Template_5
47
+ reference: 'Given the review headline, return a categorical rating. '
48
+ e40e4a53-ca5d-4fc8-a7c3-be9adfe0dbec: !Template
49
+ answer_choices: null
50
+ id: e40e4a53-ca5d-4fc8-a7c3-be9adfe0dbec
51
+ jinja: Generate a {{star_rating}}-star review headline (1 being lowest and 5 being
52
+ highest) about this product. ||| {{review_headline}}
53
+ metadata: !TemplateMetadata
54
+ choices_in_prompt: null
55
+ metrics: []
56
+ original_task: false
57
+ name: Template_3
58
+ reference: 'Generate review headline based on rating. '
59
+ e6a1bbde-715d-4dad-9178-e2bcfaf5c646: !Template
60
+ answer_choices: null
61
+ id: e6a1bbde-715d-4dad-9178-e2bcfaf5c646
62
+ jinja: 'How would you rate this review from 1 to 5 (1 being lowest and 5 being
63
+ highest): {{review_body}}? ||| {{star_rating}}'
64
+ metadata: !TemplateMetadata
65
+ choices_in_prompt: null
66
+ metrics: []
67
+ original_task: true
68
+ name: Template_4
69
+ reference: 'Given the review body, return a categorical rating. '
promptsource/templates/ambig_qa/light/templates.yaml ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: ambig_qa
2
+ subset: light
3
+ templates:
4
+ 5f79fa25-3804-4e32-9493-a12c1c2ddff0: !Template
5
+ answer_choices: null
6
+ id: 5f79fa25-3804-4e32-9493-a12c1c2ddff0
7
+ jinja: "{# Assignement in if clause breaks test, we need to declare variables\
8
+ \ in global scope first: https://github.com/pallets/jinja/issues/1314 #}\n{%\
9
+ \ set selected_question = \"\" %}\n{% set selected_answer = \"\" %}\n{% set\
10
+ \ random_question_id = -1 %}\n{% if annotations.type[0] == \"multipleQAs\" %}\n\
11
+ \ {% set random_question_id = range(0, annotations.qaPairs[0].question | length)\
12
+ \ | choice%}\n {% set selected_question = annotations.qaPairs[0].question[random_question_id]\
13
+ \ %}\n {% set selected_answer = annotations.qaPairs[0].answer[random_question_id]\
14
+ \ | choice %}\n{% else %}\n {% set selected_question = question %}\n {%\
15
+ \ set selected_answer = annotations.answer | choice %}\n{% endif %}\n\n{{selected_question}}\n\
16
+ |||\n{{selected_answer}}"
17
+ metadata: !TemplateMetadata
18
+ choices_in_prompt: null
19
+ metrics: []
20
+ original_task: false
21
+ name: ambig_qa_light3
22
+ reference: Randomly choose an annotated question and answer it using one of its
23
+ answers.
24
+ 72bf511b-44ce-4b9f-a2d0-5ed6334f0e07: !Template
25
+ answer_choices: null
26
+ id: 72bf511b-44ce-4b9f-a2d0-5ed6334f0e07
27
+ jinja: "{# Assignement in if clause breaks test, we need to declare variables\
28
+ \ in global scope first: https://github.com/pallets/jinja/issues/1314 #}\n{%\
29
+ \ set random_question_id = -1 %}\n{% set random_answer_id = -1 %}\n{% set selected_question\
30
+ \ = \"\" %}\n{% set selected_answer = \"\" %}\n{% if annotations.type[0] ==\
31
+ \ \"multipleQAs\" %}\n {% set random_question_id = range(0, annotations.qaPairs[0].question\
32
+ \ | length) | choice%}\n {% set random_answer_id = range(0, annotations.qaPairs[0].answer\
33
+ \ | length) | choice%}\n {% set selected_question = annotations.qaPairs[0].question[random_question_id]\
34
+ \ %}\n {% set selected_answer = annotations.qaPairs[0].answer[random_answer_id]\
35
+ \ | choice%}\n{% else %}\n {% set random_question_id = 0 %}\n {% set random_answer_id\
36
+ \ = 0 %}\n {% set selected_question = question %}\n {% set selected_answer\
37
+ \ = annotations.answer[0] | choice %}\n{% endif %}\n\nIs \"{{selected_answer}}\"\
38
+ \ the answer to \"{{selected_question}}\"?\n\n|||\n\n{% if random_answer_id\
39
+ \ == random_question_id %} Yes {% else %} No {% endif %}"
40
+ metadata: !TemplateMetadata
41
+ choices_in_prompt: null
42
+ metrics: []
43
+ original_task: false
44
+ name: ambig_qa_light4
45
+ reference: Classify if the given answer if correct compared to the chosen question
46
+ 7655d2aa-70df-42cf-9bfa-80484521f856: !Template
47
+ answer_choices: null
48
+ id: 7655d2aa-70df-42cf-9bfa-80484521f856
49
+ jinja: "{{question}}\n\n|||\n\n{# Assignement in if clause breaks test, we need\
50
+ \ to declare variables in global scope first: https://github.com/pallets/jinja/issues/1314\
51
+ \ #}\n{% set random_answer = \"\" %}\n{% set random_answer_form = \"\" %}\n\
52
+ {% if annotations.type[0] == \"singleAnswer\" %}\n {% set random_answer_form\
53
+ \ = annotations.answer[0] | choice %}\n{% else %}\n {% set random_answer\
54
+ \ = annotations.qaPairs[0].answer | choice %}\n {% set random_answer_form\
55
+ \ = random_answer | choice %}\n{% endif %}\n\n{{random_answer_form}}"
56
+ metadata: !TemplateMetadata
57
+ choices_in_prompt: null
58
+ metrics: []
59
+ original_task: false
60
+ name: ambig_qa_light1
61
+ reference: Given the question, we choose the answer in single QA and randomly
62
+ choose when in multipleQA.
63
+ bb089312-23cb-475d-93b5-952781bc6be4: !Template
64
+ answer_choices: null
65
+ id: bb089312-23cb-475d-93b5-952781bc6be4
66
+ jinja: "{# Assignement in if clause breaks test, we need to declare variables\
67
+ \ in global scope first: https://github.com/pallets/jinja/issues/1314 #}\n{%\
68
+ \ set selected_question = \"\" %}\n{% set selected_answer = \"\" %}\n{% set\
69
+ \ random_question_id = -1 %}\n{% if annotations.type[0] == \"multipleQAs\" %}\n\
70
+ \ {% set random_question_id = range(0, annotations.qaPairs[0].question | length)\
71
+ \ | choice%}\n {% set selected_question = annotations.qaPairs[0].question[random_question_id]%}\n\
72
+ \ {% set selected_answer = annotations.qaPairs[0].answer[random_question_id]\
73
+ \ | choice%}\n{% else %}\n {% set selected_question = question %}\n {% set\
74
+ \ selected_answer = annotations.answer | choice %}\n{% endif %}\nKnowing that\
75
+ \ \"{{selected_answer}}\" is the answer, what could have been the question?\n\
76
+ |||\n{{selected_question}}"
77
+ metadata: !TemplateMetadata
78
+ choices_in_prompt: null
79
+ metrics: []
80
+ original_task: false
81
+ name: ambig_qa_light5
82
+ reference: Generate the answer from the question
83
+ f53d00ea-98a8-45d3-92f6-93a8909aef2a: !Template
84
+ answer_choices: null
85
+ id: f53d00ea-98a8-45d3-92f6-93a8909aef2a
86
+ jinja: "{{question}}\n\n|||\n\n{% if annotations.type[0] == \"singleAnswer\" %}\n\
87
+ \ {{annotations.answer[0] | choice}}\n{% else %}\n The questions was ambiguous.\
88
+ \ Did you mean \"{{annotations.qaPairs[0].question |choice}}\"?\n{% endif %}\n"
89
+ metadata: !TemplateMetadata
90
+ choices_in_prompt: null
91
+ metrics: []
92
+ original_task: false
93
+ name: ambig_qa_light2
94
+ reference: If a question is ambiguous, ask another question, otherwise answer.
promptsource/templates/anli/templates.yaml ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: anli
2
+ templates:
3
+ 0cc3ae39-3997-4686-8c93-5d51457efa1f: !Template
4
+ answer_choices: Correct ||| Inconclusive ||| Incorrect
5
+ id: 0cc3ae39-3997-4686-8c93-5d51457efa1f
6
+ jinja: '{{premise}} Using only the above description and what you know about the
7
+ world, "{{hypothesis}}" is definitely correct, incorrect, or inconclusive? |||
8
+ {{ answer_choices[label] }}'
9
+ metadata: !TemplateMetadata
10
+ choices_in_prompt: true
11
+ metrics:
12
+ - Accuracy
13
+ original_task: true
14
+ name: MNLI crowdsource
15
+ reference: Adapted from Williams et al. 2018's instructions to crowdsourcing workers.
16
+ 179eb863-3ece-4e6f-af0f-fcb46d997306: !Template
17
+ answer_choices: Yes ||| Maybe ||| No
18
+ id: 179eb863-3ece-4e6f-af0f-fcb46d997306
19
+ jinja: 'Given {{premise}} Should we assume that "{{hypothesis}}" is true? Yes,
20
+ no, or maybe? ||| {{ answer_choices[label] }} '
21
+ metadata: !TemplateMetadata
22
+ choices_in_prompt: true
23
+ metrics:
24
+ - Accuracy
25
+ original_task: true
26
+ name: should assume
27
+ reference: Webson & Pavlick 2021
28
+ 5459237b-97de-4340-bf7b-2939c3f7ca19: !Template
29
+ answer_choices: Yes ||| Maybe ||| No
30
+ id: 5459237b-97de-4340-bf7b-2939c3f7ca19
31
+ jinja: Given that {{premise}} Does it follow that {{hypothesis}} Yes, no, or maybe?
32
+ ||| {{ answer_choices[label] }}
33
+ metadata: !TemplateMetadata
34
+ choices_in_prompt: true
35
+ metrics:
36
+ - Accuracy
37
+ original_task: true
38
+ name: does it follow that
39
+ reference: v0.1
40
+ 620aa3fc-d5eb-46f5-a1ee-4c754527aa97: !Template
41
+ answer_choices: True ||| Neither ||| False
42
+ id: 620aa3fc-d5eb-46f5-a1ee-4c754527aa97
43
+ jinja: '{{premise}}
44
+
45
+ Question: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label]
46
+ }}'
47
+ metadata: !TemplateMetadata
48
+ choices_in_prompt: true
49
+ metrics:
50
+ - Accuracy
51
+ original_task: true
52
+ name: GPT-3 style
53
+ reference: 'Same as reported in Figure G7 of the GPT-3 paper, except that there
54
+ is no task identifying tokens like "anli R1: ".'
55
+ 9b613182-c6ab-4427-9221-3d68f6d62765: !Template
56
+ answer_choices: Yes ||| Maybe ||| No
57
+ id: 9b613182-c6ab-4427-9221-3d68f6d62765
58
+ jinja: '{{premise}} Based on the previous passage, is it true that "{{hypothesis}}"?
59
+ Yes, no, or maybe? ||| {{ answer_choices[label] }}'
60
+ metadata: !TemplateMetadata
61
+ choices_in_prompt: true
62
+ metrics:
63
+ - Accuracy
64
+ original_task: true
65
+ name: based on the previous passage
66
+ reference: "Adapted from the BoolQ prompts in Schick & Sch\xFCtze 2021."
67
+ a850110d-f1a3-49b4-949a-d3bfe9f81344: !Template
68
+ answer_choices: Yes ||| Maybe ||| No
69
+ id: a850110d-f1a3-49b4-949a-d3bfe9f81344
70
+ jinja: '{{premise}} Are we justified in saying that "{{hypothesis}}"? Yes, no,
71
+ or maybe? ||| {{ answer_choices[label] }} '
72
+ metadata: !TemplateMetadata
73
+ choices_in_prompt: true
74
+ metrics:
75
+ - Accuracy
76
+ original_task: true
77
+ name: justified in saying
78
+ reference: Webson & Pavlick 2021
79
+ bab86d5a-4f9c-40db-b619-a7b7d5cae681: !Template
80
+ answer_choices: True ||| Inconclusive ||| False
81
+ id: bab86d5a-4f9c-40db-b619-a7b7d5cae681
82
+ jinja: 'Take the following as truth: {{premise}}
83
+
84
+ Then the following statement: "{{hypothesis}}" is {{"true"}}, {{"false"}}, or
85
+ {{"inconclusive"}}? ||| {{ answer_choices[label] }}'
86
+ metadata: !TemplateMetadata
87
+ choices_in_prompt: true
88
+ metrics:
89
+ - Accuracy
90
+ original_task: true
91
+ name: take the following as truth
92
+ reference: Bers et al.
93
+ bcd90047-3a2b-426b-b065-8a418f1317b8: !Template
94
+ answer_choices: Yes ||| Maybe ||| No
95
+ id: bcd90047-3a2b-426b-b065-8a418f1317b8
96
+ jinja: 'Given that {{premise}} Therefore, it must be true that "{{hypothesis}}"?
97
+ Yes, no, or maybe? ||| {{ answer_choices[label] }} '
98
+ metadata: !TemplateMetadata
99
+ choices_in_prompt: true
100
+ metrics:
101
+ - Accuracy
102
+ original_task: true
103
+ name: must be true
104
+ reference: v0.1
105
+ c4ed37ae-d7d7-4197-a725-ef2152fa3b1f: !Template
106
+ answer_choices: Yes ||| Maybe ||| No
107
+ id: c4ed37ae-d7d7-4197-a725-ef2152fa3b1f
108
+ jinja: 'Suppose {{premise}} Can we infer that "{{hypothesis}}"? Yes, no, or maybe?
109
+ ||| {{ answer_choices[label] }} '
110
+ metadata: !TemplateMetadata
111
+ choices_in_prompt: true
112
+ metrics:
113
+ - Accuracy
114
+ original_task: true
115
+ name: can we infer
116
+ reference: Webson & Pavlick 2021
117
+ ca24b93a-6265-462f-b140-e329c03d94fa: !Template
118
+ answer_choices: Guaranteed ||| Possible ||| Impossible
119
+ id: ca24b93a-6265-462f-b140-e329c03d94fa
120
+ jinja: "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is\
121
+ \ {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label]\
122
+ \ }}"
123
+ metadata: !TemplateMetadata
124
+ choices_in_prompt: true
125
+ metrics:
126
+ - Accuracy
127
+ original_task: true
128
+ name: guaranteed/possible/impossible
129
+ reference: Bers et al.
130
+ dbc68425-5c42-43ae-9748-70ce8c5a167e: !Template
131
+ answer_choices: Always ||| Sometimes ||| Never
132
+ id: dbc68425-5c42-43ae-9748-70ce8c5a167e
133
+ jinja: Suppose it's true that {{premise}} Then, is "{{hypothesis}}" {{"always"}},
134
+ {{"sometimes"}}, or {{"never"}} true? ||| {{ answer_choices[label] }}
135
+ metadata: !TemplateMetadata
136
+ choices_in_prompt: true
137
+ metrics:
138
+ - Accuracy
139
+ original_task: true
140
+ name: always/sometimes/never
141
+ reference: Bers et al.
142
+ e5b7fdd7-fdff-4630-889b-3c7a052e5da0: !Template
143
+ answer_choices: Yes ||| Maybe ||| No
144
+ id: e5b7fdd7-fdff-4630-889b-3c7a052e5da0
145
+ jinja: "{{premise}} \n\nQuestion: Does this imply that \"{{hypothesis}}\"? Yes,\
146
+ \ no, or maybe? ||| {{answer_choices[label]}}"
147
+ metadata: !TemplateMetadata
148
+ choices_in_prompt: true
149
+ metrics:
150
+ - Accuracy
151
+ original_task: true
152
+ name: does this imply
153
+ reference: v0.1
154
+ e6f32b9c-7e0b-474a-a0d2-e84d20c22aba: !Template
155
+ answer_choices: Always ||| Sometimes ||| Never
156
+ id: e6f32b9c-7e0b-474a-a0d2-e84d20c22aba
157
+ jinja: "{{premise}} \n\nKeeping in mind the above text, consider: {{hypothesis}}\
158
+ \ Is this {{\"always\"}}, {{\"sometimes\"}}, or {{\"never\"}} correct? ||| {{\
159
+ \ answer_choices[label] }}"
160
+ metadata: !TemplateMetadata
161
+ choices_in_prompt: true
162
+ metrics:
163
+ - Accuracy
164
+ original_task: true
165
+ name: consider always/sometimes/never
166
+ reference: Bers et al.
167
+ ec249357-e672-4e7d-b8b6-d97ed7d090c5: !Template
168
+ answer_choices: True ||| Inconclusive ||| False
169
+ id: ec249357-e672-4e7d-b8b6-d97ed7d090c5
170
+ jinja: '{{premise}} Based on that information, is the claim: "{{hypothesis}}"
171
+ {{"true"}}, {{"false"}}, or {{"inconclusive"}}? ||| {{ answer_choices[label]
172
+ }}'
173
+ metadata: !TemplateMetadata
174
+ choices_in_prompt: true
175
+ metrics:
176
+ - Accuracy
177
+ original_task: true
178
+ name: claim true/false/inconclusive
179
+ reference: Bers et al.
180
+ ffa0a6f0-7186-4ccb-bb35-8b1affb747a0: !Template
181
+ answer_choices: Yes ||| Maybe ||| No
182
+ id: ffa0a6f0-7186-4ccb-bb35-8b1affb747a0
183
+ jinja: 'Given {{premise}} Is it guaranteed true that "{{hypothesis}}"? Yes, no,
184
+ or maybe? ||| {{ answer_choices[label] }} '
185
+ metadata: !TemplateMetadata
186
+ choices_in_prompt: true
187
+ metrics:
188
+ - Accuracy
189
+ original_task: true
190
+ name: guaranteed true
191
+ reference: Webson & Pavlick 2021
promptsource/templates/app_reviews/templates.yaml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: app_reviews
2
+ templates:
3
+ 2da8f134-58db-4f9d-b3b0-8c6b50693ab5: !Template
4
+ answer_choices: Not at all ||| No ||| Maybe ||| Yes ||| Definitely
5
+ id: 2da8f134-58db-4f9d-b3b0-8c6b50693ab5
6
+ jinja: 'Given this review: "{{review}}"
7
+
8
+ Would you recommend this app to a friend? {{answer_choices[0]}}, {{answer_choices[1]}},
9
+ {{answer_choices[2]}}, {{answer_choices[3]}}, or {{answer_choices[4]}}?
10
+
11
+ |||
12
+
13
+ {{answer_choices[star-1]}}'
14
+ metadata: !TemplateMetadata
15
+ choices_in_prompt: true
16
+ metrics:
17
+ - Accuracy
18
+ - Spearman Correlation
19
+ original_task: false
20
+ name: categorize_rating_using_review
21
+ reference: Given the review, return a categorical answer.
22
+ 8086b434-a75e-45a4-87fb-4364601e2e05: !Template
23
+ answer_choices: null
24
+ id: 8086b434-a75e-45a4-87fb-4364601e2e05
25
+ jinja: 'Generate a {{star}}-star review (1 being lowest and 5 being highest) about
26
+ an app with package {{package_name}}.
27
+
28
+ |||
29
+
30
+ {{review}}'
31
+ metadata: !TemplateMetadata
32
+ choices_in_prompt: null
33
+ metrics: []
34
+ original_task: null
35
+ name: generate_review
36
+ reference: Generate a review from the rating.
37
+ 9746ce4b-ac58-4dfb-9783-d77c95cb62cf: !Template
38
+ answer_choices: "\u2605 ||| \u2605\u2605 ||| \u2605\u2605\u2605 ||| \u2605\u2605\
39
+ \u2605\u2605 ||| \u2605\u2605\u2605\u2605\u2605"
40
+ id: 9746ce4b-ac58-4dfb-9783-d77c95cb62cf
41
+ jinja: "What would be the \u2605-rating of this review (\u2605 being the lowest\
42
+ \ and \u2605\u2605\u2605\u2605\u2605 being the highest)? \"{{review}}\"\n|||\n\
43
+ {{answer_choices[star-1]}}"
44
+ metadata: !TemplateMetadata
45
+ choices_in_prompt: false
46
+ metrics:
47
+ - Accuracy
48
+ - Spearman Correlation
49
+ original_task: false
50
+ name: convert_to_star_rating
51
+ reference: Given the review, generate a star rating.
52
+ d34e1413-2699-4701-baa2-05d931d012ba: !Template
53
+ answer_choices: null
54
+ id: d34e1413-2699-4701-baa2-05d931d012ba
55
+ jinja: 'On a scale of 1-5 (with 1 being least favorable and 5 being most favorable),
56
+ how would you rate this review? "{{review}}"
57
+
58
+ |||
59
+
60
+ {{star}}'
61
+ metadata: !TemplateMetadata
62
+ choices_in_prompt: false
63
+ metrics:
64
+ - Accuracy
65
+ - Spearman Correlation
66
+ original_task: false
67
+ name: convert_to_rating
68
+ reference: Convert review to rating
promptsource/templates/aqua_rat/raw/templates.yaml ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: aqua_rat
2
+ subset: raw
3
+ templates:
4
+ 13bd5099-33fa-4383-a441-33a7d2e1746f: !Template
5
+ answer_choices: null
6
+ id: 13bd5099-33fa-4383-a441-33a7d2e1746f
7
+ jinja: 'Given the problem:
8
+
9
+ {{question}}
10
+
11
+
12
+ and the options:
13
+
14
+ {% for i in range(options|length) %}
15
+
16
+ {{options[i].replace('')'', '') '')}}
17
+
18
+ {% endfor %}
19
+
20
+
21
+ The correct answer is |||
22
+
23
+ {{correct}}'
24
+ metadata: !TemplateMetadata
25
+ choices_in_prompt: null
26
+ metrics: []
27
+ original_task: true
28
+ name: temp_6
29
+ reference: ''
30
+ 58a6aa2b-ca26-473d-9bf8-385dd1a743cd: !Template
31
+ answer_choices: null
32
+ id: 58a6aa2b-ca26-473d-9bf8-385dd1a743cd
33
+ jinja: 'You will now be given a question and a set of options. Choose the correct
34
+ option and provide a rationale for the same.
35
+
36
+
37
+ Question:
38
+
39
+ {{question}}
40
+
41
+
42
+ Options:
43
+
44
+ {% for i in range(options|length) %}
45
+
46
+ {{options[i].replace('')'', '') '')}}
47
+
48
+ {% endfor %}
49
+
50
+
51
+ |||
52
+
53
+ {{correct}}
54
+
55
+
56
+ {{rationale}}
57
+
58
+ '
59
+ metadata: !TemplateMetadata
60
+ choices_in_prompt: null
61
+ metrics: []
62
+ original_task: true
63
+ name: temp_4
64
+ reference: ''
65
+ 5acfaa48-e1b6-44df-8e92-c58b94bff595: !Template
66
+ answer_choices: null
67
+ id: 5acfaa48-e1b6-44df-8e92-c58b94bff595
68
+ jinja: "Answer the given question by providing the correct rationale:\n\n{{question}}\n\
69
+ {% for i in range(options|length) %}\n {{options[i].replace(')', ') ')}}\n\
70
+ {%endfor%}\n|||\n{{rationale}}"
71
+ metadata: !TemplateMetadata
72
+ choices_in_prompt: null
73
+ metrics: []
74
+ original_task: true
75
+ name: temp_2
76
+ reference: ''
77
+ 815acaf5-2e59-4f81-8190-ae75dc237cf1: !Template
78
+ answer_choices: null
79
+ id: 815acaf5-2e59-4f81-8190-ae75dc237cf1
80
+ jinja: '{{question}}
81
+
82
+
83
+ The above question was asked in a Math test. Given the following options, can
84
+ you choose the correct one?
85
+
86
+
87
+ {% for i in range(options|length) %}
88
+
89
+ {{options[i].replace('')'', '') '')}}
90
+
91
+ {% endfor %}
92
+
93
+ |||
94
+
95
+ {{correct}}'
96
+ metadata: !TemplateMetadata
97
+ choices_in_prompt: null
98
+ metrics: []
99
+ original_task: true
100
+ name: temp_3
101
+ reference: ''
102
+ c0403841-68b0-4c08-8c3b-a00a81272d05: !Template
103
+ answer_choices: null
104
+ id: c0403841-68b0-4c08-8c3b-a00a81272d05
105
+ jinja: "Solve the following question and choose the correct option.\n\n{{question}}\
106
+ \ \n{% for i in range(options|length) %}\n{{options[i].replace(')', ') ')}}\n\
107
+ {%endfor%}\n||| \n{{correct}}\n\n"
108
+ metadata: !TemplateMetadata
109
+ choices_in_prompt: null
110
+ metrics: []
111
+ original_task: true
112
+ name: basic
113
+ reference: ''
114
+ c9352c6c-074b-4beb-8489-c151adeeedcb: !Template
115
+ answer_choices: null
116
+ id: c9352c6c-074b-4beb-8489-c151adeeedcb
117
+ jinja: "Question: \n{{question}}\n\nOptions: \n{% for i in range(options|length)\
118
+ \ %}\n{{options[i].replace(')', ') ')}}\n{% endfor %}\n\nThis is how I solved\
119
+ \ the above question:\n|||\n{{rationale}}\n"
120
+ metadata: !TemplateMetadata
121
+ choices_in_prompt: null
122
+ metrics: []
123
+ original_task: true
124
+ name: temp_5
125
+ reference: ''
promptsource/templates/art/templates.yaml ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: art
2
+ templates:
3
+ 151d0e97-d7d2-47f2-86b4-6777587b16f2: !Template
4
+ answer_choices: null
5
+ id: 151d0e97-d7d2-47f2-86b4-6777587b16f2
6
+ jinja: "We know that:\n\n{{ observation_1 | trim('.?!') }},\n\nand:\n\n{{ observation_2\
7
+ \ }} \n\nWhat is more likely?\n\nFirst option: \n\n{{ hypothesis_1 | trim('.?!')\
8
+ \ }}, \n\nor second option:\n\n{{ hypothesis_2 | trim('.?!') }}?\n|||\n{{ [hypothesis_1,\
9
+ \ hypothesis_2][label-1]}}"
10
+ metadata: !TemplateMetadata
11
+ choices_in_prompt: null
12
+ metrics: []
13
+ original_task: true
14
+ name: hyp4
15
+ reference: ''
16
+ 2c74c78c-1757-4236-8925-594bbff9a621: !Template
17
+ answer_choices: null
18
+ id: 2c74c78c-1757-4236-8925-594bbff9a621
19
+ jinja: 'Which version is more accurate?
20
+
21
+
22
+ The first one:
23
+
24
+
25
+ {{ hypothesis_2 | trim(''.?!'') }},
26
+
27
+
28
+ or the second one:
29
+
30
+
31
+ {{ hypothesis_1 | trim(''.?!'') }}?
32
+
33
+
34
+ Assuming that:
35
+
36
+
37
+ {{ observation_1 }} {{ observation_2 }}
38
+
39
+ |||
40
+
41
+ {{ [hypothesis_1, hypothesis_2][label-1] }}'
42
+ metadata: !TemplateMetadata
43
+ choices_in_prompt: null
44
+ metrics: []
45
+ original_task: true
46
+ name: hyp5_reversed
47
+ reference: ''
48
+ 2e360dde-c137-405c-bd8b-9e31c9f2aa8c: !Template
49
+ answer_choices: No ||| Yes
50
+ id: 2e360dde-c137-405c-bd8b-9e31c9f2aa8c
51
+ jinja: "Given that: \n\n{{ observation_1 | trim('.?!') }}, \n\nand: \n\n{{\
52
+ \ observation_2 | trim('.?!') }}, \n\nis it true that:\n\n{{ hypothesis_2\
53
+ \ | trim('.?!')}}?\n|||\n{{ answer_choices[label-1] }}"
54
+ metadata: !TemplateMetadata
55
+ choices_in_prompt: null
56
+ metrics: []
57
+ original_task: null
58
+ name: hyp2_1
59
+ reference: ''
60
+ 43fd9dac-ce01-4d9c-9a03-ae38d98bb5aa: !Template
61
+ answer_choices: No ||| Yes
62
+ id: 43fd9dac-ce01-4d9c-9a03-ae38d98bb5aa
63
+ jinja: "Does this statement: \n\n{{ hypothesis_2 | trim('.?!') }} \n\nexplain\
64
+ \ the situation described below?\n\n{{ observation_1 }}\n{{ observation_2 }}\n\
65
+ |||\n{{ answer_choices[label-1] }}"
66
+ metadata: !TemplateMetadata
67
+ choices_in_prompt: null
68
+ metrics: []
69
+ original_task: null
70
+ name: hyp2_2
71
+ reference: ''
72
+ 5015a37a-c66b-4b44-9e92-08a403a7b6aa: !Template
73
+ answer_choices: null
74
+ id: 5015a37a-c66b-4b44-9e92-08a403a7b6aa
75
+ jinja: '{{ observation_1 }} {{ observation_2 }}
76
+
77
+
78
+ Would you rather believe that:
79
+
80
+
81
+ {{ hypothesis_2 | trim(''.?!'') }},
82
+
83
+
84
+ or:
85
+
86
+
87
+ {{ hypothesis_1 | trim(''.?!'') }}?
88
+
89
+ |||
90
+
91
+ {{ [hypothesis_1, hypothesis_2][label-1] }}'
92
+ metadata: !TemplateMetadata
93
+ choices_in_prompt: null
94
+ metrics: []
95
+ original_task: true
96
+ name: hyp3_reversed
97
+ reference: ''
98
+ 6dda5a3f-3511-4f9b-9062-a33fe98c477d: !Template
99
+ answer_choices: Yes ||| No
100
+ id: 6dda5a3f-3511-4f9b-9062-a33fe98c477d
101
+ jinja: "Given that: \n\n{{ observation_1 | trim('.?!') }}, \n\nand: \n\n{{ \
102
+ \ observation_2 | trim('.?!') }}, \n\nis it true that:\n\n{{ hypothesis_1 |\
103
+ \ trim('.?!') }}?\n|||\n{{ answer_choices[label-1] }}"
104
+ metadata: !TemplateMetadata
105
+ choices_in_prompt: null
106
+ metrics: []
107
+ original_task: null
108
+ name: hyp1_1
109
+ reference: ''
110
+ bf8a5b8a-70cb-4b27-82db-8ca4fbd2318d: !Template
111
+ answer_choices: null
112
+ id: bf8a5b8a-70cb-4b27-82db-8ca4fbd2318d
113
+ jinja: '{{ observation_1 }} {{ observation_2 }}
114
+
115
+
116
+ Would you rather believe that:
117
+
118
+
119
+ {{ hypothesis_1 | trim(''.?!'') }},
120
+
121
+
122
+ or:
123
+
124
+
125
+ {{ hypothesis_2 | trim(''.?!'') }}?
126
+
127
+ |||
128
+
129
+ {{ [hypothesis_1, hypothesis_2][label-1] }}'
130
+ metadata: !TemplateMetadata
131
+ choices_in_prompt: null
132
+ metrics: []
133
+ original_task: true
134
+ name: hyp3
135
+ reference: ''
136
+ c0fc2e80-063f-4f8a-ad5d-c7603ed74883: !Template
137
+ answer_choices: null
138
+ id: c0fc2e80-063f-4f8a-ad5d-c7603ed74883
139
+ jinja: "Which of the following better fits the description?\n\nIs it that: \n\n\
140
+ {{ hypothesis_2 | trim('.?!') }},\n\nor rather: \n\n{{ hypothesis_1 | trim('.?!')\
141
+ \ }}?\n\nDescription: \n\n{{ observation_1 }} {{ observation_2 }}\n|||\n{{ [hypothesis_1,\
142
+ \ hypothesis_2][label-1] }}"
143
+ metadata: !TemplateMetadata
144
+ choices_in_prompt: null
145
+ metrics: []
146
+ original_task: true
147
+ name: hyp6_reversed
148
+ reference: ''
149
+ d418b574-9d0a-4d29-a518-7d9a5f5a4a3d: !Template
150
+ answer_choices: null
151
+ id: d418b574-9d0a-4d29-a518-7d9a5f5a4a3d
152
+ jinja: "Which of the following better fits the description?\n\nIs it that: \n\n\
153
+ {{ hypothesis_1 | trim('.?!') }},\n\nor rather: \n\n{{ hypothesis_2 | trim('.?!')\
154
+ \ }}?\n\nDescription: \n\n{{ observation_1 }} {{ observation_2 }}\n|||\n{{ [hypothesis_1,\
155
+ \ hypothesis_2][label-1] }}"
156
+ metadata: !TemplateMetadata
157
+ choices_in_prompt: null
158
+ metrics: []
159
+ original_task: true
160
+ name: hyp6
161
+ reference: ''
162
+ e4442077-bc1b-40eb-831f-a19971f810d7: !Template
163
+ answer_choices: Yes ||| No
164
+ id: e4442077-bc1b-40eb-831f-a19971f810d7
165
+ jinja: "Does this statement: \n\n{{ hypothesis_1 | trim('.?!') }} \n\nexplain\
166
+ \ the situation described below? \n\n{{ observation_1 }}\n{{ observation_2 }}\n\
167
+ |||\n{{ answer_choices[label-1] }}"
168
+ metadata: !TemplateMetadata
169
+ choices_in_prompt: null
170
+ metrics: []
171
+ original_task: null
172
+ name: hyp1_2
173
+ reference: ''
174
+ e90f1ef2-e6cd-4bfa-a697-a6d9e1077cee: !Template
175
+ answer_choices: null
176
+ id: e90f1ef2-e6cd-4bfa-a697-a6d9e1077cee
177
+ jinja: "We know that:\n\n{{ observation_1 | trim('.?!') }},\n\nand:\n\n{{ observation_2\
178
+ \ }} \n\nWhat is more likely?\n\nFirst option: \n\n{{ hypothesis_2 | trim('.?!')\
179
+ \ }}, \n\nor second option:\n\n{{ hypothesis_1 | trim('.?!') }}?\n|||\n{{ [hypothesis_1,\
180
+ \ hypothesis_2][label-1]}}"
181
+ metadata: !TemplateMetadata
182
+ choices_in_prompt: null
183
+ metrics: []
184
+ original_task: true
185
+ name: hyp4_reversed
186
+ reference: ''
187
+ eb0baa43-3c79-4d1d-973a-37e0055bbfec: !Template
188
+ answer_choices: null
189
+ id: eb0baa43-3c79-4d1d-973a-37e0055bbfec
190
+ jinja: 'Which version is more accurate?
191
+
192
+
193
+ The first one:
194
+
195
+
196
+ {{ hypothesis_1 | trim(''.?!'') }},
197
+
198
+
199
+ or the second one:
200
+
201
+
202
+ {{ hypothesis_2 | trim(''.?!'') }}?
203
+
204
+
205
+ Assuming that:
206
+
207
+
208
+ {{ observation_1 }} {{ observation_2 }}
209
+
210
+ |||
211
+
212
+ {{ [hypothesis_1, hypothesis_2][label-1] }}'
213
+ metadata: !TemplateMetadata
214
+ choices_in_prompt: null
215
+ metrics: []
216
+ original_task: true
217
+ name: hyp5
218
+ reference: ''
promptsource/templates/asnq/templates.yaml ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: asnq
2
+ templates:
3
+ 55f386ba-9a86-405e-a805-152e254a4205: !Template
4
+ answer_choices: null
5
+ id: 55f386ba-9a86-405e-a805-152e254a4205
6
+ jinja: "{% if label == 1 %}\n\nWhat is a question that someone might ask that\
7
+ \ the following sentence can answer?\n\n {{sentence}}\n\n|||\n\n{{question}}\n\
8
+ {% endif %}\n"
9
+ metadata: !TemplateMetadata
10
+ choices_in_prompt: null
11
+ metrics: []
12
+ original_task: false
13
+ name: Sentence question generation 2
14
+ reference: ''
15
+ 5b6abb0a-1b4f-4338-aab6-430465669164: !Template
16
+ answer_choices: null
17
+ id: 5b6abb0a-1b4f-4338-aab6-430465669164
18
+ jinja: '{% if label == 1 %}
19
+
20
+
21
+ Write a question based on this sentence: {{sentence}}
22
+
23
+
24
+ |||
25
+
26
+
27
+ {{question}}
28
+
29
+ {% endif %}
30
+
31
+ '
32
+ metadata: !TemplateMetadata
33
+ choices_in_prompt: null
34
+ metrics: []
35
+ original_task: null
36
+ name: sentence question generation
37
+ reference: ''
38
+ 859ec580-957b-42da-be1b-c3ccb8b52d24: !Template
39
+ answer_choices: null
40
+ id: 859ec580-957b-42da-be1b-c3ccb8b52d24
41
+ jinja: '{% if label == 1 %}
42
+
43
+
44
+ Generate a one-sentence answer to the following question: {{question}}?
45
+
46
+
47
+ |||
48
+
49
+
50
+ {{sentence}}
51
+
52
+ {% endif %}
53
+
54
+ '
55
+ metadata: !TemplateMetadata
56
+ choices_in_prompt: null
57
+ metrics: []
58
+ original_task: false
59
+ name: answer question with a sentence 3
60
+ reference: ''
61
+ 85da6666-9e50-4122-84c8-d00b90967475: !Template
62
+ answer_choices: null
63
+ id: 85da6666-9e50-4122-84c8-d00b90967475
64
+ jinja: '{% if label == 1 %}
65
+
66
+
67
+ I was wondering, {{question}}? Can you give me a full sentence answer?
68
+
69
+
70
+ |||
71
+
72
+
73
+ {{sentence}}
74
+
75
+ {% endif %}
76
+
77
+ '
78
+ metadata: !TemplateMetadata
79
+ choices_in_prompt: null
80
+ metrics: []
81
+ original_task: false
82
+ name: answer question with a sentence 2
83
+ reference: ''
84
+ 85fe8aaa-83c5-41ec-ada5-0e6d60bab1f9: !Template
85
+ answer_choices: null
86
+ id: 85fe8aaa-83c5-41ec-ada5-0e6d60bab1f9
87
+ jinja: '{% if label == 1 %}
88
+
89
+
90
+ Answer this question as a full sentence: {{question}}?
91
+
92
+
93
+ |||
94
+
95
+
96
+ {{sentence}}
97
+
98
+ {% endif %}
99
+
100
+ '
101
+ metadata: !TemplateMetadata
102
+ choices_in_prompt: null
103
+ metrics: []
104
+ original_task: null
105
+ name: answer question as a sentence
106
+ reference: ''
107
+ a36d6152-72c4-4278-8266-d27b28667f61: !Template
108
+ answer_choices: null
109
+ id: a36d6152-72c4-4278-8266-d27b28667f61
110
+ jinja: "{% if label == 1 %}\n\nHere is a sentence:\n\n {{sentence}}\n\nWrite a\
111
+ \ question that this sentence is an answer to.\n\n|||\n\n{{question}}\n{% endif\
112
+ \ %}\n"
113
+ metadata: !TemplateMetadata
114
+ choices_in_prompt: null
115
+ metrics: []
116
+ original_task: false
117
+ name: Sentence question generation 3
118
+ reference: ''
promptsource/templates/asset/ratings/templates.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: asset
2
+ subset: ratings
3
+ templates:
4
+ 09b2a13b-cba6-4473-8a46-3fa24be71ce2: !Template
5
+ answer_choices: null
6
+ id: 09b2a13b-cba6-4473-8a46-3fa24be71ce2
7
+ jinja: "{% set questions= [ \"Does the second sentence better convey the information?\"\
8
+ , \"Is the second sentence more fluent?\", \"Is the second sentence easier\
9
+ \ to understand?\"] %}\n\nFirst sentence: {{original}}\n\nSecond sentence: {{simplification}}\n\
10
+ \n{{questions[aspect]}} \n\n|||\n\n{% if rating > 50 %}\n Yes\n{% else %}\n\
11
+ \ No\n{% endif %}"
12
+ metadata: !TemplateMetadata
13
+ choices_in_prompt: null
14
+ metrics: []
15
+ original_task: false
16
+ name: asset_ratings1
17
+ reference: Taking questions from the original paper, we use rating to establish
18
+ a binary classification problem.
19
+ 47142040-4121-4144-98b9-61cb5cbb1313: !Template
20
+ answer_choices: null
21
+ id: 47142040-4121-4144-98b9-61cb5cbb1313
22
+ jinja: 'First sentence: {{original}}
23
+
24
+
25
+ Second sentence: {{simplification}}
26
+
27
+
28
+ I am scoring these simplification exercises. How easier to read is the second
29
+ sentence on a scale from 0 (harder to read) to 100 (easier to read)?
30
+
31
+
32
+ |||
33
+
34
+
35
+ {{rating}}'
36
+ metadata: !TemplateMetadata
37
+ choices_in_prompt: null
38
+ metrics: []
39
+ original_task: true
40
+ name: asset_ratings3
41
+ reference: Prompt model to rate how simplified the sentence is in the general
42
+ sense, instead of an particular aspect.
43
+ d2bed959-29ab-4962-a106-dc91c00f3f03: !Template
44
+ answer_choices: null
45
+ id: d2bed959-29ab-4962-a106-dc91c00f3f03
46
+ jinja: "{% set statements= [ \"the second sentence expresses the underlying meaning\
47
+ \ the best.\", \"the second sentence is more fluent.\", \"the second sentence\
48
+ \ is easier to read and understand.\"] %}\n\nFirst sentence: {{original}}\n\n\
49
+ Second sentence: {{simplification}}\n\nRate the following statement from 0 (strongly\
50
+ \ disagree) to 100 (strongly agree): {{statements[aspect]}} \n\n|||\n\n{{rating}}"
51
+ metadata: !TemplateMetadata
52
+ choices_in_prompt: null
53
+ metrics: []
54
+ original_task: true
55
+ name: asset_ratings2
56
+ reference: Require the model to output the rating
promptsource/templates/asset/simplification/templates.yaml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: asset
2
+ subset: simplification
3
+ templates:
4
+ 0f0e55f9-28b4-4844-b65d-b9544a0918eb: !Template
5
+ answer_choices: null
6
+ id: 0f0e55f9-28b4-4844-b65d-b9544a0918eb
7
+ jinja: "{{original}}\n\nHow would I say this in another way? \n\n|||\n\n{{simplifications\
8
+ \ | choice}}"
9
+ metadata: !TemplateMetadata
10
+ choices_in_prompt: null
11
+ metrics: []
12
+ original_task: true
13
+ name: asset_simplification1
14
+ reference: Rewrite text using one random simplification
15
+ 3cbfbc1c-6876-4dd7-b7db-45fb3233a667: !Template
16
+ answer_choices: null
17
+ id: 3cbfbc1c-6876-4dd7-b7db-45fb3233a667
18
+ jinja: "{{simplifications | choice}}\n\nHow would I say this in another way? \n\
19
+ \n|||\n\n{{original}}"
20
+ metadata: !TemplateMetadata
21
+ choices_in_prompt: null
22
+ metrics: []
23
+ original_task: false
24
+ name: asset_simplification2
25
+ reference: Find the original text from the simplification
26
+ d528d74b-bbc2-4888-ae21-db0ab37304df: !Template
27
+ answer_choices: null
28
+ id: d528d74b-bbc2-4888-ae21-db0ab37304df
29
+ jinja: 'I''d like to explain to my child "{{original}}". How would I do so?
30
+
31
+
32
+ |||
33
+
34
+
35
+ {{simplifications | choice}}'
36
+ metadata: !TemplateMetadata
37
+ choices_in_prompt: null
38
+ metrics: []
39
+ original_task: true
40
+ name: asset_simplification3
41
+ reference: Implicit simplification request
promptsource/templates/banking77/templates.yaml ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: banking77
2
+ templates:
3
+ 0dba8abc-248a-44db-bb86-20492ffc17f6: !Template
4
+ answer_choices: null
5
+ id: 0dba8abc-248a-44db-bb86-20492ffc17f6
6
+ jinja: "Which help page can be provided to provide information regarding this\
7
+ \ query?\n\n{{text}} |||\n{{\n[\n \"activate_my_card\",\n \"age_limit\",\n\
8
+ \ \"apple_pay_or_google_pay\",\n \"atm_support\",\n \"automatic_top_up\"\
9
+ ,\n \"balance_not_updated_after_bank_transfer\",\n \"balance_not_updated_after_cheque_or_cash_deposit\"\
10
+ ,\n \"beneficiary_not_allowed\",\n \"cancel_transfer\",\n \"card_about_to_expire\"\
11
+ ,\n \"card_acceptance\",\n \"card_arrival\",\n \"card_delivery_estimate\"\
12
+ ,\n \"card_linking\",\n \"card_not_working\",\n \"card_payment_fee_charged\"\
13
+ ,\n \"card_payment_not_recognised\",\n \"card_payment_wrong_exchange_rate\"\
14
+ ,\n \"card_swallowed\",\n \"cash_withdrawal_charge\",\n \"cash_withdrawal_not_recognised\"\
15
+ ,\n \"change_pin\",\n \"compromised_card\",\n \"contactless_not_working\"\
16
+ ,\n \"country_support\",\n \"declined_card_payment\",\n \"declined_cash_withdrawal\"\
17
+ ,\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\",\n \"\
18
+ disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
19
+ ,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
20
+ ,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
21
+ ,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
22
+ ,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
23
+ ,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
24
+ ,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
25
+ receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
26
+ reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
27
+ ,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
28
+ top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
29
+ top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
30
+ ,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
31
+ ,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
32
+ ,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
33
+ ,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
34
+ ,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n] [label].replace(\"_\", \"\
35
+ \ \")\n}}"
36
+ metadata: !TemplateMetadata
37
+ choices_in_prompt: null
38
+ metrics: []
39
+ original_task: null
40
+ name: topic
41
+ reference: ''
42
+ 2520f6d0-fcdf-44b6-abb3-a76e44948047: !Template
43
+ answer_choices: null
44
+ id: 2520f6d0-fcdf-44b6-abb3-a76e44948047
45
+ jinja: "To which department in the bank can this query be directed?\n\n{{text}}\
46
+ \ |||\n{{\n[\n \"activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\"\
47
+ ,\n \"atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
48
+ ,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
49
+ ,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
50
+ ,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
51
+ \ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
52
+ ,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
53
+ ,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
54
+ ,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
55
+ ,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
56
+ ,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
57
+ ,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
58
+ ,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
59
+ ,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
60
+ ,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
61
+ ,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
62
+ ,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
63
+ receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
64
+ reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
65
+ ,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
66
+ top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
67
+ top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
68
+ ,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
69
+ ,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
70
+ ,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
71
+ ,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
72
+ ,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n] [label] | replace(\"_\"\
73
+ , \" \")\n}}"
74
+ metadata: !TemplateMetadata
75
+ choices_in_prompt: null
76
+ metrics: []
77
+ original_task: null
78
+ name: department
79
+ reference: ''
80
+ 9482bce0-f201-451b-9384-af588d707629: !Template
81
+ answer_choices: null
82
+ id: 9482bce0-f201-451b-9384-af588d707629
83
+ jinja: "\n{% set li = [ \"activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\"\
84
+ ,\n \"atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
85
+ ,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
86
+ ,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
87
+ ,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
88
+ \ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
89
+ ,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
90
+ ,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
91
+ ,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
92
+ ,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
93
+ ,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
94
+ ,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
95
+ ,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
96
+ ,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
97
+ ,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
98
+ ,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
99
+ ,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
100
+ receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
101
+ reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
102
+ ,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
103
+ top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
104
+ top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
105
+ ,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
106
+ ,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
107
+ ,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
108
+ ,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
109
+ ,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n] %}\n\nTo which department\
110
+ \ ({{li|join(\", \")|replace(\"_\", \" \")}}) in the bank can this query be\
111
+ \ directed?\n\n{{text}} |||\n{{ li [label] | replace(\"_\", \" \")}}"
112
+ metadata: !TemplateMetadata
113
+ choices_in_prompt: null
114
+ metrics: []
115
+ original_task: null
116
+ name: department_options
117
+ reference: ''
118
+ e629d77c-46f9-4e00-b23a-c522d07a9943: !Template
119
+ answer_choices: null
120
+ id: e629d77c-46f9-4e00-b23a-c522d07a9943
121
+ jinja: "Summarise the following query in the form of key banking terms\n\n{{text}}\
122
+ \ |||\n{{\n[\n \"activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\"\
123
+ ,\n \"atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
124
+ ,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
125
+ ,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
126
+ ,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
127
+ \ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
128
+ ,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
129
+ ,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
130
+ ,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
131
+ ,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
132
+ ,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
133
+ ,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
134
+ ,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
135
+ ,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
136
+ ,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
137
+ ,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
138
+ ,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
139
+ receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
140
+ reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
141
+ ,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
142
+ top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
143
+ top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
144
+ ,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
145
+ ,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
146
+ ,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
147
+ ,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
148
+ ,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n][label].replace(\"_\", \"\
149
+ \ \")\n}}"
150
+ metadata: !TemplateMetadata
151
+ choices_in_prompt: null
152
+ metrics: []
153
+ original_task: null
154
+ name: rephrase
155
+ reference: ''
156
+ edd67883-0386-4496-af7f-37a44c41293f: !Template
157
+ answer_choices: null
158
+ id: edd67883-0386-4496-af7f-37a44c41293f
159
+ jinja: "\n{% set li = [ \"activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\"\
160
+ ,\n \"atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
161
+ ,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
162
+ ,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
163
+ ,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
164
+ \ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
165
+ ,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
166
+ ,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
167
+ ,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
168
+ ,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
169
+ ,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
170
+ ,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
171
+ ,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
172
+ ,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
173
+ ,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
174
+ ,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
175
+ ,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
176
+ receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
177
+ reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
178
+ ,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
179
+ top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
180
+ top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
181
+ ,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
182
+ ,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
183
+ ,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
184
+ ,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
185
+ ,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n] %}\n\nWhich intent ({{ li|join(\"\
186
+ , \")|replace(\"_\", \" \")}}) best represents this banking query?\n\n{{text}}\
187
+ \ |||\n{{\nli [label] | replace(\"_\", \" \")\n}}"
188
+ metadata: !TemplateMetadata
189
+ choices_in_prompt: null
190
+ metrics: []
191
+ original_task: null
192
+ name: intent_options
193
+ reference: ''
194
+ eee2366a-8f0c-4ac3-b9cc-aa038e40f8cb: !Template
195
+ answer_choices: null
196
+ id: eee2366a-8f0c-4ac3-b9cc-aa038e40f8cb
197
+ jinja: "What is the intent of this banking query?\n\n{{text}} |||\n{{\n[\n \"\
198
+ activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\",\n \"\
199
+ atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
200
+ ,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
201
+ ,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
202
+ ,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
203
+ \ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
204
+ ,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
205
+ ,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
206
+ ,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
207
+ ,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
208
+ ,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
209
+ ,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
210
+ ,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
211
+ ,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
212
+ ,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
213
+ ,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
214
+ ,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
215
+ receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
216
+ reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
217
+ ,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
218
+ top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
219
+ top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
220
+ ,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
221
+ ,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
222
+ ,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
223
+ ,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
224
+ ,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n] [label].replace(\"_\", \"\
225
+ \ \")\n}}"
226
+ metadata: !TemplateMetadata
227
+ choices_in_prompt: null
228
+ metrics: []
229
+ original_task: null
230
+ name: intent
231
+ reference: ''
232
+ f4e80455-1523-4b91-aacc-249d8c6f0f2a: !Template
233
+ answer_choices: null
234
+ id: f4e80455-1523-4b91-aacc-249d8c6f0f2a
235
+ jinja: "Generate the subject for the email containing this query:\n\n{{text}}\
236
+ \ |||\n{{\n[\n \"activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\"\
237
+ ,\n \"atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
238
+ ,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
239
+ ,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
240
+ ,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
241
+ \ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
242
+ ,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
243
+ ,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
244
+ ,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
245
+ ,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
246
+ ,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
247
+ ,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
248
+ ,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
249
+ ,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
250
+ ,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
251
+ ,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
252
+ ,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
253
+ receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
254
+ reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
255
+ ,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
256
+ top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
257
+ top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
258
+ ,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
259
+ ,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
260
+ ,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
261
+ ,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
262
+ ,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n][label].replace(\"_\", \"\
263
+ \ \")\n}}"
264
+ metadata: !TemplateMetadata
265
+ choices_in_prompt: null
266
+ metrics: []
267
+ original_task: null
268
+ name: generate_subject
269
+ reference: ''
promptsource/templates/billsum/templates.yaml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: billsum
2
+ templates:
3
+ 3ac01292-4a54-4546-b4e6-c225ae114213: !Template
4
+ answer_choices: null
5
+ id: 3ac01292-4a54-4546-b4e6-c225ae114213
6
+ jinja: 'Summarize: {{text}}|||
7
+
8
+ Title: {{title}}
9
+
10
+ Summary: {{summary}}'
11
+ metadata: !TemplateMetadata
12
+ choices_in_prompt: null
13
+ metrics: []
14
+ original_task: null
15
+ name: 'Summarize: (text-> title,summary)'
16
+ reference: ''
17
+ 3c790ac3-0557-47a9-9b71-1cb435f15629: !Template
18
+ answer_choices: null
19
+ id: 3c790ac3-0557-47a9-9b71-1cb435f15629
20
+ jinja: 'Summarize this bill: {{text}} |||
21
+
22
+ {{title}}'
23
+ metadata: !TemplateMetadata
24
+ choices_in_prompt: null
25
+ metrics: []
26
+ original_task: null
27
+ name: 'Summarize this bill in one sentence: (text-> title)'
28
+ reference: ''
29
+ 438192e5-d67a-4098-9d82-a9fe892f6be2: !Template
30
+ answer_choices: null
31
+ id: 438192e5-d67a-4098-9d82-a9fe892f6be2
32
+ jinja: 'Write a bill: {{summary}} |||
33
+
34
+ {{text}}'
35
+ metadata: !TemplateMetadata
36
+ choices_in_prompt: null
37
+ metrics: []
38
+ original_task: null
39
+ name: 'Write a bill: (summary-> text)'
40
+ reference: ''
41
+ 4891a8e7-258c-41e2-80d3-0c1a054acb07: !Template
42
+ answer_choices: null
43
+ id: 4891a8e7-258c-41e2-80d3-0c1a054acb07
44
+ jinja: 'Write a bill: {{title}} |||
45
+
46
+ {{text}}'
47
+ metadata: !TemplateMetadata
48
+ choices_in_prompt: null
49
+ metrics: []
50
+ original_task: null
51
+ name: 'Write a bill: (title-> text)'
52
+ reference: ''
53
+ 550fa161-af4e-4430-9844-ce7dad587733: !Template
54
+ answer_choices: null
55
+ id: 550fa161-af4e-4430-9844-ce7dad587733
56
+ jinja: 'Summarize this bill: {{text}} |||
57
+
58
+ {{summary}}'
59
+ metadata: !TemplateMetadata
60
+ choices_in_prompt: null
61
+ metrics: []
62
+ original_task: null
63
+ name: 'Summarize this bill: (text-> summary)'
64
+ reference: ''
65
+ 5d2404b9-63ff-406e-977d-eda6afb5c689: !Template
66
+ answer_choices: null
67
+ id: 5d2404b9-63ff-406e-977d-eda6afb5c689
68
+ jinja: '{{summary}}
69
+
70
+ ===
71
+
72
+ Generate title from summary:
73
+
74
+ |||{{title}}'
75
+ metadata: !TemplateMetadata
76
+ choices_in_prompt: null
77
+ metrics: []
78
+ original_task: null
79
+ name: Generate title from summary
80
+ reference: ''
81
+ 6a439a80-4924-49e9-b5ae-f661683b399f: !Template
82
+ answer_choices: null
83
+ id: 6a439a80-4924-49e9-b5ae-f661683b399f
84
+ jinja: 'Summarize: {{text}}
85
+
86
+ |||{{summary}}'
87
+ metadata: !TemplateMetadata
88
+ choices_in_prompt: null
89
+ metrics: []
90
+ original_task: null
91
+ name: 'Summarize: (text -> summary )'
92
+ reference: ''
93
+ ea9f0376-6cec-450c-b258-89f479cb9f6d: !Template
94
+ answer_choices: null
95
+ id: ea9f0376-6cec-450c-b258-89f479cb9f6d
96
+ jinja: 'Summarize: {{summary}}
97
+
98
+ |||{{title}}'
99
+ metadata: !TemplateMetadata
100
+ choices_in_prompt: null
101
+ metrics: []
102
+ original_task: null
103
+ name: 'Summarize: (summary -> title)'
104
+ reference: ''
promptsource/templates/bing_coronavirus_query_set/templates.yaml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: bing_coronavirus_query_set
2
+ templates:
3
+ 43332782-9e92-4bb2-94bf-28759f3fe181: !Template
4
+ answer_choices: null
5
+ id: 43332782-9e92-4bb2-94bf-28759f3fe181
6
+ jinja: "This search query talks about the coronavirus and was published on {{Date}}.\
7
+ \ In what country was it issued ? \n{{Query}}\n|||\n{{Country}}"
8
+ metadata: !TemplateMetadata
9
+ choices_in_prompt: null
10
+ metrics: []
11
+ original_task: false
12
+ name: 'what_country '
13
+ reference: ''
14
+ 68f9c063-1907-4866-ab1b-756cc57e5695: !Template
15
+ answer_choices: null
16
+ id: 68f9c063-1907-4866-ab1b-756cc57e5695
17
+ jinja: "The user is searching for coronavirus results on Bing.com. Is the intent\
18
+ \ implicit or explicit ? \n{{Query}}\n|||\n{% if IsImplicitIntent == \"True\"\
19
+ \ %}\nimplicit\n{% else %}\nexplicit\n{% endif %}"
20
+ metadata: !TemplateMetadata
21
+ choices_in_prompt: null
22
+ metrics: []
23
+ original_task: true
24
+ name: 'is_implicit_or_explicit '
25
+ reference: ''
26
+ 992d541f-9e0c-466d-b4c4-92e9e236f863: !Template
27
+ answer_choices: null
28
+ id: 992d541f-9e0c-466d-b4c4-92e9e236f863
29
+ jinja: "This search query about coronavirus was issued in {{Country}} on {{Date}}.\
30
+ \ Is the intent implicit or explicit ? \n{{Query}}\n|||\n{% if IsImplicitIntent\
31
+ \ == \"True\" %}\nimplicit\n{% else %}\nexplicit \n{% endif %}"
32
+ metadata: !TemplateMetadata
33
+ choices_in_prompt: null
34
+ metrics: []
35
+ original_task: true
36
+ name: 'is_explicit_country_date '
37
+ reference: ''
38
+ d4a251d7-0e23-4feb-8bf2-18e32c553199: !Template
39
+ answer_choices: null
40
+ id: d4a251d7-0e23-4feb-8bf2-18e32c553199
41
+ jinja: "On what date was this search engine query issued, during the Covid-19\
42
+ \ pandemic ? \n{{Query}}\n|||\n{{Date}}"
43
+ metadata: !TemplateMetadata
44
+ choices_in_prompt: null
45
+ metrics: []
46
+ original_task: false
47
+ name: 'what_date '
48
+ reference: ''
49
+ df53652c-36dc-45fe-a015-d0781e32cd33: !Template
50
+ answer_choices: null
51
+ id: df53652c-36dc-45fe-a015-d0781e32cd33
52
+ jinja: "Does this search engine query have an indirect relation to Covid-19 ?\
53
+ \ \n{{Query}}\n|||\n{% if IsImplicitIntent == \"True\" %}\nYes\n{% else %}\n\
54
+ No\n{% endif %}"
55
+ metadata: !TemplateMetadata
56
+ choices_in_prompt: null
57
+ metrics: []
58
+ original_task: true
59
+ name: is_implicit_query
60
+ reference: ''
61
+ df7bc2ee-686c-4826-ad84-3a056a2da4d4: !Template
62
+ answer_choices: null
63
+ id: df7bc2ee-686c-4826-ad84-3a056a2da4d4
64
+ jinja: "Does this search query on Bing.com talk about the coronavirus explicitly\
65
+ \ ? \n{{Query}}\n|||\n{% if IsImplicitIntent == \"True\" %}\nNo\n{% else %}\n\
66
+ Yes\n{% endif %}"
67
+ metadata: !TemplateMetadata
68
+ choices_in_prompt: null
69
+ metrics: []
70
+ original_task: true
71
+ name: is_explicit_query
72
+ reference: ''
promptsource/templates/blended_skill_talk/templates.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: blended_skill_talk
2
+ templates:
3
+ 54f785e9-453a-4ffe-8181-28095e3f2b80: !Template
4
+ answer_choices: null
5
+ id: 54f785e9-453a-4ffe-8181-28095e3f2b80
6
+ jinja: "Given the below conversation between two people, what would the listener\
7
+ \ say?\n\nA: {{previous_utterance[0]}}\n\nB: {{previous_utterance[1]}}\n\n{%\
8
+ \ for message_f, message_g in zip(free_messages[:-1], guided_messages[:-1])\
9
+ \ %}\nA: {{message_f}}\n\nB: {{message_g}}\n{% endfor %} \n\nA: {{free_messages[-1]}}\n\
10
+ \nB: \n|||\n{{guided_messages[-1]}}"
11
+ metadata: !TemplateMetadata
12
+ choices_in_prompt: null
13
+ metrics: []
14
+ original_task: false
15
+ name: guess-last-utterance
16
+ reference: ''
17
+ 58f4e068-26fa-4843-a1d6-54bde324e780: !Template
18
+ answer_choices: null
19
+ id: 58f4e068-26fa-4843-a1d6-54bde324e780
20
+ jinja: "Two people are having a conversation. Are the utterances in the correct\
21
+ \ order?\n{% if range(0, 2) | choice %}\nA: {{previous_utterance[0]}}\n\nB:\
22
+ \ {{previous_utterance[1]}}\n\n{% for message_f, message_g in zip(free_messages,\
23
+ \ guided_messages) %}\nA: {{message_f}}\n\nB: {{message_g}}\n{% endfor %} \n\
24
+ \n|||\nYes, they are.\n{% else %}\nA: {{previous_utterance[1]}}\n\nB: {{previous_utterance[0]}}\n\
25
+ \n{% for message_f, message_g in zip(guided_messages, free_messages) %}\nA:\
26
+ \ {{message_f}}\n\nB: {{message_g}}\n{% endfor %} \n\n|||\nNo, they are not.\n\
27
+ {% endif %}"
28
+ metadata: !TemplateMetadata
29
+ choices_in_prompt: null
30
+ metrics: []
31
+ original_task: false
32
+ name: guess-correct-order
33
+ reference: ''
34
+ 8792b63e-7217-40fe-8130-7392baca3519: !Template
35
+ answer_choices: null
36
+ id: 8792b63e-7217-40fe-8130-7392baca3519
37
+ jinja: "Two people are talking to each other. What do you think Person A said\
38
+ \ in the beginning?\n\nPerson B: {{previous_utterance[1]}}\n\n{% for message_f,\
39
+ \ message_g in zip(free_messages, guided_messages) %}\nPerson A: {{message_f}}\n\
40
+ \nPerson B: {{message_g}}\n{% endfor %} \n|||\n{{previous_utterance[0]}}\n"
41
+ metadata: !TemplateMetadata
42
+ choices_in_prompt: null
43
+ metrics: []
44
+ original_task: false
45
+ name: guess-first-utterance
46
+ reference: ''
promptsource/templates/boolq/templates.yaml ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: boolq
2
+ templates:
3
+ 9bd5fbaa-e7a2-4847-a7a1-500591d90bb4: !Template
4
+ answer_choices: null
5
+ id: 9bd5fbaa-e7a2-4847-a7a1-500591d90bb4
6
+ jinja: '{{passage}} {{question}}? |||
7
+
8
+ {% if answer == true %}
9
+
10
+ Yes
11
+
12
+ {% else %}
13
+
14
+ No
15
+
16
+ {% endif %}'
17
+ metadata: !TemplateMetadata
18
+ choices_in_prompt: null
19
+ metrics: []
20
+ original_task: null
21
+ name: LM style
22
+ reference: Concatenate passage and question. Transform True/False into Yes/No.
23
+ c746b16d-212d-4f1f-9988-9fee99584f25: !Template
24
+ answer_choices: null
25
+ id: c746b16d-212d-4f1f-9988-9fee99584f25
26
+ jinja: '{{passage}}
27
+
28
+ Question: {{question}}?
29
+
30
+ Answer: |||
31
+
32
+ {% if answer == true %}
33
+
34
+ Yes
35
+
36
+ {% else %}
37
+
38
+ No
39
+
40
+ {% endif %}'
41
+ metadata: !TemplateMetadata
42
+ choices_in_prompt: null
43
+ metrics: []
44
+ original_task: null
45
+ name: Boolq GPT3
46
+ reference: Take from GPT3 - Figure G29
47
+ dc7caf4f-b109-4a82-86a0-2798a5437283: !Template
48
+ answer_choices: null
49
+ id: dc7caf4f-b109-4a82-86a0-2798a5437283
50
+ jinja: '{{passage}}
51
+
52
+ {{question}}?
53
+
54
+ Answer by yes or no. |||
55
+
56
+ {% if answer == true %}
57
+
58
+ Yes
59
+
60
+ {% else %}
61
+
62
+ No
63
+
64
+ {% endif %}'
65
+ metadata: !TemplateMetadata
66
+ choices_in_prompt: null
67
+ metrics: []
68
+ original_task: null
69
+ name: yes/no
70
+ reference: Yes or no
71
+ fbba0375-4220-4483-8bbe-0fd630330611: !Template
72
+ answer_choices: null
73
+ id: fbba0375-4220-4483-8bbe-0fd630330611
74
+ jinja: 'Answer the question based on the passage.
75
+
76
+ ===
77
+
78
+ Question: {{question}}?
79
+
80
+ Passage: {{passage}}
81
+
82
+ Answer: |||
83
+
84
+ {% if answer == true %}
85
+
86
+ Yes
87
+
88
+ {% else %}
89
+
90
+ No
91
+
92
+ {% endif %}'
93
+ metadata: !TemplateMetadata
94
+ choices_in_prompt: null
95
+ metrics: []
96
+ original_task: null
97
+ name: Exercise style
98
+ reference: Prompt in the style of task description + instance. Mapped True/False
99
+ into Yes/No
promptsource/templates/cbt/CN/templates.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: cbt
2
+ subset: CN
3
+ templates:
4
+ 0725fe5e-1bba-4e08-a448-9e0038164914: !Template
5
+ answer_choices: null
6
+ id: 0725fe5e-1bba-4e08-a448-9e0038164914
7
+ jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
8
+
9
+ |||
10
+
11
+ {{ question.replace("XXXXX", answer) }}'
12
+ metadata: !TemplateMetadata
13
+ choices_in_prompt: null
14
+ metrics: []
15
+ original_task: null
16
+ name: next_sentence_generation
17
+ reference: Generate the next sentence given the story.
18
+ 2c326181-dbba-401e-accb-d84ea0162f0a: !Template
19
+ answer_choices: null
20
+ id: 2c326181-dbba-401e-accb-d84ea0162f0a
21
+ jinja: 'Read the passage and fill in the XXXXX:
22
+
23
+ {{ sentences | join('''') }} {{question}}
24
+
25
+ |||
26
+
27
+ {{ answer }}'
28
+ metadata: !TemplateMetadata
29
+ choices_in_prompt: null
30
+ metrics: []
31
+ original_task: null
32
+ name: answer_prediction
33
+ reference: Fill in the blank without options.
34
+ b26cae56-1fbd-47a5-8c8d-d981ca098239: !Template
35
+ answer_choices: null
36
+ id: b26cae56-1fbd-47a5-8c8d-d981ca098239
37
+ jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
38
+ \ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
39
+ |||\n{{ answer }}"
40
+ metadata: !TemplateMetadata
41
+ choices_in_prompt: null
42
+ metrics: []
43
+ original_task: null
44
+ name: multi_choice
45
+ reference: Given the sentences, fill the blanks using the options.
promptsource/templates/cbt/NE/templates.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: cbt
2
+ subset: NE
3
+ templates:
4
+ 1fd986ce-e44d-4f32-bbb8-f5d4d3d930d9: !Template
5
+ answer_choices: null
6
+ id: 1fd986ce-e44d-4f32-bbb8-f5d4d3d930d9
7
+ jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
8
+ \ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
9
+ |||\n{{ answer }}"
10
+ metadata: !TemplateMetadata
11
+ choices_in_prompt: null
12
+ metrics: []
13
+ original_task: null
14
+ name: multi_choice
15
+ reference: Given the sentences, fill the blanks using the options.
16
+ 3c56e28d-668a-42d0-8976-93864e38bc4c: !Template
17
+ answer_choices: null
18
+ id: 3c56e28d-668a-42d0-8976-93864e38bc4c
19
+ jinja: 'Read the passage and fill in the XXXXX:
20
+
21
+ {{ sentences | join('''') }} {{question}}
22
+
23
+ |||
24
+
25
+ {{ answer }}'
26
+ metadata: !TemplateMetadata
27
+ choices_in_prompt: null
28
+ metrics: []
29
+ original_task: null
30
+ name: answer_prediction
31
+ reference: Fill in the blank without options.
32
+ d2f4dcdd-232e-4e56-a9e1-1aed294e651f: !Template
33
+ answer_choices: null
34
+ id: d2f4dcdd-232e-4e56-a9e1-1aed294e651f
35
+ jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
36
+
37
+ |||
38
+
39
+ {{ question.replace("XXXXX", answer) }}'
40
+ metadata: !TemplateMetadata
41
+ choices_in_prompt: null
42
+ metrics: []
43
+ original_task: null
44
+ name: next_sentence_generation
45
+ reference: Generate the next sentence given the story.
promptsource/templates/cbt/P/templates.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: cbt
2
+ subset: P
3
+ templates:
4
+ 0c217578-64bb-431d-af5b-8944582a49f2: !Template
5
+ answer_choices: null
6
+ id: 0c217578-64bb-431d-af5b-8944582a49f2
7
+ jinja: 'Read the passage and fill in the XXXXX:
8
+
9
+ {{ sentences | join('''') }} {{question}}
10
+
11
+ |||
12
+
13
+ {{ answer }}'
14
+ metadata: !TemplateMetadata
15
+ choices_in_prompt: null
16
+ metrics: []
17
+ original_task: null
18
+ name: answer_prediction
19
+ reference: Fill in the blank without options.
20
+ 3753a293-98ba-4f98-9bb9-96b86aa0b719: !Template
21
+ answer_choices: null
22
+ id: 3753a293-98ba-4f98-9bb9-96b86aa0b719
23
+ jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
24
+ \ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
25
+ |||\n{{ answer }}"
26
+ metadata: !TemplateMetadata
27
+ choices_in_prompt: null
28
+ metrics: []
29
+ original_task: null
30
+ name: multi_choice
31
+ reference: Given the sentences, fill the blanks using the options.
32
+ e7a60793-f142-44e2-9fab-b39ba3236106: !Template
33
+ answer_choices: null
34
+ id: e7a60793-f142-44e2-9fab-b39ba3236106
35
+ jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
36
+
37
+ |||
38
+
39
+ {{ question.replace("XXXXX", answer) }}'
40
+ metadata: !TemplateMetadata
41
+ choices_in_prompt: null
42
+ metrics: []
43
+ original_task: null
44
+ name: next_sentence_generation
45
+ reference: Generate the next sentence given the story.
promptsource/templates/cbt/V/templates.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: cbt
2
+ subset: V
3
+ templates:
4
+ 08820238-5bb3-4c7c-98bb-ec3d81e432e7: !Template
5
+ answer_choices: null
6
+ id: 08820238-5bb3-4c7c-98bb-ec3d81e432e7
7
+ jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
8
+
9
+ |||
10
+
11
+ {{ question.replace("XXXXX", answer) }}'
12
+ metadata: !TemplateMetadata
13
+ choices_in_prompt: null
14
+ metrics: []
15
+ original_task: null
16
+ name: next_sentence_generation
17
+ reference: Generate the next sentence given the story.
18
+ 63bfa7b6-b566-4693-848c-e05cd7a12a03: !Template
19
+ answer_choices: null
20
+ id: 63bfa7b6-b566-4693-848c-e05cd7a12a03
21
+ jinja: 'Read the passage and fill in the XXXXX:
22
+
23
+ {{ sentences | join('''') }} {{question}}
24
+
25
+ |||
26
+
27
+ {{ answer }}'
28
+ metadata: !TemplateMetadata
29
+ choices_in_prompt: null
30
+ metrics: []
31
+ original_task: null
32
+ name: answer_prediction
33
+ reference: Fill in the blank without options.
34
+ a2e38459-90d9-4292-9d96-491ad7d4e3db: !Template
35
+ answer_choices: null
36
+ id: a2e38459-90d9-4292-9d96-491ad7d4e3db
37
+ jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
38
+ \ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
39
+ |||\n{{ answer }}"
40
+ metadata: !TemplateMetadata
41
+ choices_in_prompt: null
42
+ metrics: []
43
+ original_task: null
44
+ name: multi_choice
45
+ reference: Given the sentences, fill the blanks using the options.
promptsource/templates/cbt/raw/templates.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: cbt
2
+ subset: raw
3
+ templates:
4
+ 2d9e9c74-550e-4838-8d1d-a804d74828f7: !Template
5
+ answer_choices: null
6
+ id: 2d9e9c74-550e-4838-8d1d-a804d74828f7
7
+ jinja: 'Write a story for this title: {{title.split(''___'')[1].split(''.'')[0].replace(''_'',''
8
+ '')}}
9
+
10
+ |||
11
+
12
+ {{ content }}'
13
+ metadata: !TemplateMetadata
14
+ choices_in_prompt: null
15
+ metrics: []
16
+ original_task: null
17
+ name: write_story
18
+ reference: Given the title, write a story.
19
+ f4e1d9bb-a43e-4c75-aa5d-4711090dd628: !Template
20
+ answer_choices: null
21
+ id: f4e1d9bb-a43e-4c75-aa5d-4711090dd628
22
+ jinja: 'Write a title for this story: {{ content }}
23
+
24
+ |||
25
+
26
+ {{title.split(''___'')[1].split(''.'')[0].replace(''_'','' '')}}'
27
+ metadata: !TemplateMetadata
28
+ choices_in_prompt: null
29
+ metrics: []
30
+ original_task: null
31
+ name: write_title
32
+ reference: Given the story, write a title.
promptsource/templates/cc_news/templates.yaml ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: cc_news
2
+ templates:
3
+ 0c630a0d-5eeb-46ea-ba15-f76f5d05a57d: !Template
4
+ answer_choices: null
5
+ id: 0c630a0d-5eeb-46ea-ba15-f76f5d05a57d
6
+ jinja: 'What could be the content of a news article with the following title and
7
+ summary?
8
+
9
+
10
+ Title: {{title}}
11
+
12
+
13
+ Summary: {{description}}
14
+
15
+ |||
16
+
17
+ {{text}}'
18
+ metadata: !TemplateMetadata
19
+ choices_in_prompt: null
20
+ metrics: []
21
+ original_task: null
22
+ name: text_3
23
+ reference: ''
24
+ 0c651168-8729-4a35-8c7c-5d812d4be790: !Template
25
+ answer_choices: null
26
+ id: 0c651168-8729-4a35-8c7c-5d812d4be790
27
+ jinja: "{{ text }} \n\nGive a brief description of the above text.\n|||\n{{ description\
28
+ \ }}"
29
+ metadata: !TemplateMetadata
30
+ choices_in_prompt: null
31
+ metrics: []
32
+ original_task: true
33
+ name: desc_c_q_1
34
+ reference: ''
35
+ 11a681c3-8450-4064-aa08-ad3700b8b1bd: !Template
36
+ answer_choices: null
37
+ id: 11a681c3-8450-4064-aa08-ad3700b8b1bd
38
+ jinja: '{{ text }}
39
+
40
+
41
+ What title would you choose for the text above?
42
+
43
+ |||
44
+
45
+ {{ title }}'
46
+ metadata: !TemplateMetadata
47
+ choices_in_prompt: null
48
+ metrics: []
49
+ original_task: true
50
+ name: title_c_q_2
51
+ reference: ''
52
+ 14aca5f0-89ae-4ae1-9746-7a68f6a0664f: !Template
53
+ answer_choices: null
54
+ id: 14aca5f0-89ae-4ae1-9746-7a68f6a0664f
55
+ jinja: 'Suggest the content of a news article entitled:
56
+
57
+
58
+ {{ title }},
59
+
60
+
61
+ regarding:
62
+
63
+
64
+ {{ description }}
65
+
66
+ |||
67
+
68
+ {{ text }}'
69
+ metadata: !TemplateMetadata
70
+ choices_in_prompt: null
71
+ metrics: []
72
+ original_task: null
73
+ name: text_1
74
+ reference: ''
75
+ 319a6d41-d6bb-4f8f-ba1b-085a45b3eddd: !Template
76
+ answer_choices: null
77
+ id: 319a6d41-d6bb-4f8f-ba1b-085a45b3eddd
78
+ jinja: "Write a brief summary of the text below: \n\n{{ text }}\n|||\n{{ description\
79
+ \ }}"
80
+ metadata: !TemplateMetadata
81
+ choices_in_prompt: null
82
+ metrics: []
83
+ original_task: true
84
+ name: desc_q_c_3
85
+ reference: ''
86
+ 5ca5100e-7aa6-48c0-9e78-48914739dc90: !Template
87
+ answer_choices: null
88
+ id: 5ca5100e-7aa6-48c0-9e78-48914739dc90
89
+ jinja: 'Use the description below to write a news article entitled:
90
+
91
+ {{ title }}.
92
+
93
+
94
+ Description: {{ description }}
95
+
96
+ |||
97
+
98
+ {{ text }}'
99
+ metadata: !TemplateMetadata
100
+ choices_in_prompt: null
101
+ metrics: []
102
+ original_task: null
103
+ name: text_4
104
+ reference: ''
105
+ 7fd214bd-2403-42aa-850f-5255771e5609: !Template
106
+ answer_choices: null
107
+ id: 7fd214bd-2403-42aa-850f-5255771e5609
108
+ jinja: "Choose a title for the text below: \n\n{{ text }}\n|||\n{{ title }}"
109
+ metadata: !TemplateMetadata
110
+ choices_in_prompt: null
111
+ metrics: []
112
+ original_task: true
113
+ name: title_q_c_2
114
+ reference: ''
115
+ 858a02bf-10c0-4284-886e-26a8859b2cc3: !Template
116
+ answer_choices: null
117
+ id: 858a02bf-10c0-4284-886e-26a8859b2cc3
118
+ jinja: '{{ text }}
119
+
120
+
121
+ Summarize the essential ideas of the above piece of news.
122
+
123
+ |||
124
+
125
+ {{ description }}'
126
+ metadata: !TemplateMetadata
127
+ choices_in_prompt: null
128
+ metrics: []
129
+ original_task: true
130
+ name: desc_c_q_2
131
+ reference: ''
132
+ a993713f-fd0e-4d62-99c0-e1313ab5c1c8: !Template
133
+ answer_choices: null
134
+ id: a993713f-fd0e-4d62-99c0-e1313ab5c1c8
135
+ jinja: "{{ text }} \n\nWhat title suits best the above piece of news?\n|||\n{{\
136
+ \ title }}"
137
+ metadata: !TemplateMetadata
138
+ choices_in_prompt: null
139
+ metrics: []
140
+ original_task: true
141
+ name: title_c_q_1
142
+ reference: ''
143
+ ae553815-f631-4e67-a6bc-6d8a21dedb25: !Template
144
+ answer_choices: null
145
+ id: ae553815-f631-4e67-a6bc-6d8a21dedb25
146
+ jinja: "Summarize the essential ideas of the following piece of news: \n\n{{ text\
147
+ \ }}\n|||\n{{ description }}"
148
+ metadata: !TemplateMetadata
149
+ choices_in_prompt: null
150
+ metrics: []
151
+ original_task: true
152
+ name: desc_q_c_2
153
+ reference: ''
154
+ b637cfd7-d4b8-420a-b60b-4fe0aa891000: !Template
155
+ answer_choices: null
156
+ id: b637cfd7-d4b8-420a-b60b-4fe0aa891000
157
+ jinja: 'Write a piece of news expanding the following ideas:
158
+
159
+
160
+ {{ description }},
161
+
162
+
163
+ entitled:
164
+
165
+
166
+ {{ title }}
167
+
168
+ |||
169
+
170
+ {{ text }}'
171
+ metadata: !TemplateMetadata
172
+ choices_in_prompt: null
173
+ metrics: []
174
+ original_task: null
175
+ name: text_2
176
+ reference: ''
177
+ cc13d9b7-041a-4b29-b6c4-a6851a21fb46: !Template
178
+ answer_choices: null
179
+ id: cc13d9b7-041a-4b29-b6c4-a6851a21fb46
180
+ jinja: "Give this text a title: \n\n{{ text }}\n|||\n{{ title }}"
181
+ metadata: !TemplateMetadata
182
+ choices_in_prompt: null
183
+ metrics: []
184
+ original_task: true
185
+ name: title_q_c_1
186
+ reference: ''
187
+ e4d40d0e-8c38-45ef-97dd-15ebab0b4078: !Template
188
+ answer_choices: null
189
+ id: e4d40d0e-8c38-45ef-97dd-15ebab0b4078
190
+ jinja: "Give a brief description of the following text: \n\n{{ text }}\n|||\n\
191
+ {{ description }}"
192
+ metadata: !TemplateMetadata
193
+ choices_in_prompt: null
194
+ metrics: []
195
+ original_task: true
196
+ name: desc_q_c_1
197
+ reference: ''
198
+ f4a0b21c-fcf1-4e3d-aa59-7cf3b9ae8780: !Template
199
+ answer_choices: null
200
+ id: f4a0b21c-fcf1-4e3d-aa59-7cf3b9ae8780
201
+ jinja: "{{ text }} \n\nThe above text can be summarized as follows:\n|||\n{{ description\
202
+ \ }}"
203
+ metadata: !TemplateMetadata
204
+ choices_in_prompt: null
205
+ metrics: []
206
+ original_task: true
207
+ name: desc_c_q_3
208
+ reference: ''
promptsource/templates/circa/templates.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: circa
2
+ templates:
3
+ 053260a8-1bcc-4805-81d2-bb528fc56ca2: !Template
4
+ answer_choices: null
5
+ id: 053260a8-1bcc-4805-81d2-bb528fc56ca2
6
+ jinja: 'Convert this question to a sentence declarative sentence asserting an
7
+ affirmative answer:
8
+
9
+
10
+ {{question_X}} |||
11
+
12
+ {{canquestion_X}}'
13
+ metadata: !TemplateMetadata
14
+ choices_in_prompt: false
15
+ metrics:
16
+ - Accuracy
17
+ - BLEU
18
+ - Edit Distance
19
+ - ROUGE
20
+ original_task: false
21
+ name: question_declarative
22
+ reference: ''
23
+ 70b7a94a-6a39-4a81-9a6e-0709a0acdb28: !Template
24
+ answer_choices: "Yes ||| No ||| In the middle, neither yes nor no ||| Probably\
25
+ \ yes / sometimes yes ||| Probably no ||| Yes, subject to some conditions |||\
26
+ \ Other ||| I am not sure how X will interpret Y\u2019s answer"
27
+ id: 70b7a94a-6a39-4a81-9a6e-0709a0acdb28
28
+ jinja: "{% if goldstandard2 != -1 %}\n\nGiven the question-answer pair of X and\
29
+ \ Y in the context of {{context}}, which of the following answers is Y implying:\
30
+ \ \"{{\"Yes\"}}\", \"{{\"No\"}}\", \"{{\"In the middle, neither yes nor no\"\
31
+ }}\", \"{{\"Probably yes / sometimes yes\"}}\", \"{{\"Probably no\"}}\", \"\
32
+ {{\"Yes, subject to some conditions\"}}\", \"{{\"Other\"}}\" or \"{{\"I am not\
33
+ \ sure how X will interpret Y\u2019s answer\"}}\" ?\n\nX: {{question_X}} \n\n\
34
+ Y: {{answer_Y}} |||\n\n{{ answer_choices[goldstandard2]}}\n\n{% endif %}"
35
+ metadata: !TemplateMetadata
36
+ choices_in_prompt: true
37
+ metrics:
38
+ - Accuracy
39
+ original_task: true
40
+ name: goldstandard2_judgement
41
+ reference: ''
42
+ 73466d0f-b1b1-4c61-8f03-346e121ae06c: !Template
43
+ answer_choices: null
44
+ id: 73466d0f-b1b1-4c61-8f03-346e121ae06c
45
+ jinja: 'What is a possible question X could ask Y given the context of {{context}}
46
+ that would cause Y to answer "{{answer_Y}}"? |||
47
+
48
+ {{question_X}}'
49
+ metadata: !TemplateMetadata
50
+ choices_in_prompt: false
51
+ metrics:
52
+ - BLEU
53
+ - ROUGE
54
+ original_task: false
55
+ name: possible_qn
56
+ reference: ''
57
+ 997f7f96-d420-48c1-85f7-ecade54adbd7: !Template
58
+ answer_choices: "Yes ||| No ||| In the middle, neither yes nor no ||| Probably\
59
+ \ yes / sometimes yes ||| Probably no ||| Yes, subject to some conditions |||\
60
+ \ Other ||| I am not sure how X will interpret Y\u2019s answer"
61
+ id: 997f7f96-d420-48c1-85f7-ecade54adbd7
62
+ jinja: "{% if goldstandard1 != -1 %}\n\nGiven the question-answer pair of X and\
63
+ \ Y in the context of {{context}}, what answer is Y implying?\n\nX: {{question_X}}\
64
+ \ \n\nY: {{answer_Y}} |||\n\n{{ answer_choices[goldstandard1]}}\n\n{% endif\
65
+ \ %}"
66
+ metadata: !TemplateMetadata
67
+ choices_in_prompt: false
68
+ metrics:
69
+ - Accuracy
70
+ original_task: true
71
+ name: judgement
72
+ reference: ''
73
+ a15c1a30-5ef0-451f-b202-987a16752a0a: !Template
74
+ answer_choices: "Yes ||| No ||| In the middle, neither yes nor no ||| Probably\
75
+ \ yes / sometimes yes ||| Probably no ||| Yes, subject to some conditions |||\
76
+ \ Other ||| I am not sure how X will interpret Y\u2019s answer"
77
+ id: a15c1a30-5ef0-451f-b202-987a16752a0a
78
+ jinja: "{% if goldstandard1 != -1 %}\n\nGiven the question-answer pair of X and\
79
+ \ Y in the context of {{context}}, which of the following answers is Y implying:\
80
+ \ \"{{\"Yes\"}}\", \"{{\"No\"}}\", \"{{\"In the middle, neither yes nor no\"\
81
+ }}\", \"{{\"Probably yes / sometimes yes\"}}\", \"{{\"Probably no\"}}\", \"\
82
+ {{\"Yes, subject to some conditions\"}}\", \"{{\"Other\"}}\" or \"{{\"I am not\
83
+ \ sure how X will interpret Y\u2019s answer\"}}\" ?\n\nX: {{question_X}} \n\n\
84
+ Y: {{answer_Y}} |||\n\n{{ answer_choices[goldstandard1]}}\n\n{% endif %}"
85
+ metadata: !TemplateMetadata
86
+ choices_in_prompt: true
87
+ metrics:
88
+ - Accuracy
89
+ original_task: true
90
+ name: goldstandard1_judgement
91
+ reference: ''
promptsource/templates/climate_fever/templates.yaml ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: climate_fever
2
+ templates:
3
+ 38632cd9-7c4c-4e1d-85b3-20e7a78d4580: !Template
4
+ answer_choices: Supports ||| Refutes ||| Not enough information
5
+ id: 38632cd9-7c4c-4e1d-85b3-20e7a78d4580
6
+ jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
7
+ {{"refutes"}}, or provide {{"not enough info"}} on climate change?
8
+
9
+
10
+ Statement: {{claim}}
11
+
12
+
13
+ Evidence: {{evidences[0]["evidence"].strip(".").strip(''"'')}}.
14
+
15
+ |||
16
+
17
+ {{ answer_choices[evidences[0]["evidence_label"]] }}'
18
+ metadata: !TemplateMetadata
19
+ choices_in_prompt: null
20
+ metrics: []
21
+ original_task: null
22
+ name: first_evidence_and_claim_itemization
23
+ reference: First evidence and claim with simple itemization
24
+ 3970f474-a9e3-4264-aefa-dd4cfadd279c: !Template
25
+ answer_choices: Supports ||| Refutes ||| Not enough information ||| Disputed
26
+ id: 3970f474-a9e3-4264-aefa-dd4cfadd279c
27
+ jinja: 'Here''s a claim and accompanying evidence statements . Do the statements
28
+ {{"support"}}, {{"refute"}}, {{"dispute"}} or provide {{"not enough info"}}
29
+ on climate change?
30
+
31
+
32
+ Claim: {{claim}}
33
+
34
+
35
+ Statements:
36
+
37
+ - {{ evidences | map(attribute="evidence") | map("trim", "\".") | join(".\n-
38
+ ") }}.
39
+
40
+ |||
41
+
42
+ {{ answer_choices[claim_label] }}'
43
+ metadata: !TemplateMetadata
44
+ choices_in_prompt: null
45
+ metrics: []
46
+ original_task: null
47
+ name: claim_and_all_supporting_evidences
48
+ reference: A claim and all supproting evidences provided with the associated claim
49
+ label
50
+ 5d5062c1-d28f-4b1c-a7da-9b53796ed39f: !Template
51
+ answer_choices: Supports ||| Refutes ||| Not enough information
52
+ id: 5d5062c1-d28f-4b1c-a7da-9b53796ed39f
53
+ jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
54
+ {{"refutes"}}, or provide {{"not enough info"}} on climate change?
55
+
56
+
57
+ Statement: {{claim}}
58
+
59
+
60
+ Evidence: {{evidences[4]["evidence"].strip(".").strip(''"'')}}.
61
+
62
+ |||
63
+
64
+ {{ answer_choices[evidences[4]["evidence_label"]] }}'
65
+ metadata: !TemplateMetadata
66
+ choices_in_prompt: null
67
+ metrics: []
68
+ original_task: null
69
+ name: fifth_evidence_and_claim_itemization
70
+ reference: Fifth evidence and claim with simple itemization
71
+ 82c484bd-2ed7-4ee0-aaee-2b31ac68e751: !Template
72
+ answer_choices: Supports ||| Refutes ||| Not enough information
73
+ id: 82c484bd-2ed7-4ee0-aaee-2b31ac68e751
74
+ jinja: 'Considering the following claim:
75
+
76
+ {{claim}}.
77
+
78
+ Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
79
+ enough info"}} on climate change?
80
+
81
+ {{evidences[4]["evidence"].strip(".").strip(''"'')}}.
82
+
83
+ |||
84
+
85
+ {{ answer_choices[evidences[4]["evidence_label"]] }}'
86
+ metadata: !TemplateMetadata
87
+ choices_in_prompt: null
88
+ metrics: []
89
+ original_task: null
90
+ name: fifth_evidence_claim_pair
91
+ reference: Relation between the claim and fifth evidence pair.
92
+ 9ba074a2-fbcf-4f69-bf03-bd16dbdec9cd: !Template
93
+ answer_choices: Supports ||| Refutes ||| Not enough information
94
+ id: 9ba074a2-fbcf-4f69-bf03-bd16dbdec9cd
95
+ jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
96
+ {{"refutes"}}, or provide {{"not enough info"}} on climate change?
97
+
98
+
99
+ Statement: {{claim}}
100
+
101
+
102
+ Evidence: {{evidences[3]["evidence"].strip(".").strip(''"'')}}.
103
+
104
+ |||
105
+
106
+ {{ answer_choices[evidences[3]["evidence_label"]] }}'
107
+ metadata: !TemplateMetadata
108
+ choices_in_prompt: null
109
+ metrics: []
110
+ original_task: null
111
+ name: fourth_evidence_and_claim_itemization
112
+ reference: Fourth evidence and claim with simple itemization
113
+ 9f68b883-d6a3-4e95-af2a-b7755bc46ba9: !Template
114
+ answer_choices: Supports ||| Refutes ||| Not enough information
115
+ id: 9f68b883-d6a3-4e95-af2a-b7755bc46ba9
116
+ jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
117
+ {{"refutes"}}, or provide {{"not enough info"}} on climate change?
118
+
119
+
120
+ Statement: {{claim}}
121
+
122
+
123
+ Evidence: {{evidences[2]["evidence"].strip(".").strip(''"'')}}.
124
+
125
+ |||
126
+
127
+ {{ answer_choices[evidences[2]["evidence_label"]] }}'
128
+ metadata: !TemplateMetadata
129
+ choices_in_prompt: null
130
+ metrics: []
131
+ original_task: null
132
+ name: third_evidence_and_claim_itemization
133
+ reference: Third evidence and claim with simple itemization
134
+ cb78a363-fd32-4dbd-976f-b56de644ba90: !Template
135
+ answer_choices: Supports ||| Refutes ||| Not enough information
136
+ id: cb78a363-fd32-4dbd-976f-b56de644ba90
137
+ jinja: 'Considering the following claim:
138
+
139
+ {{claim}}.
140
+
141
+ Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
142
+ enough info"}} on climate change?
143
+
144
+ {{evidences[1]["evidence"].strip(".").strip(''"'')}}.
145
+
146
+ |||
147
+
148
+ {{ answer_choices[evidences[1]["evidence_label"]] }}'
149
+ metadata: !TemplateMetadata
150
+ choices_in_prompt: null
151
+ metrics: []
152
+ original_task: null
153
+ name: second_evidence_claim_pair
154
+ reference: Relation between the claim and second evidence pair.
155
+ cca7b6f5-29e3-45a4-bc8b-889f5ab2fc13: !Template
156
+ answer_choices: Supports ||| Refutes ||| Not enough information
157
+ id: cca7b6f5-29e3-45a4-bc8b-889f5ab2fc13
158
+ jinja: 'Considering the following claim:
159
+
160
+ {{claim}}.
161
+
162
+ Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
163
+ enough info"}} on climate change?
164
+
165
+ {{evidences[0]["evidence"].strip(".").strip(''"'')}}.
166
+
167
+ |||
168
+
169
+ {{ answer_choices[evidences[0]["evidence_label"]] }}'
170
+ metadata: !TemplateMetadata
171
+ choices_in_prompt: null
172
+ metrics: []
173
+ original_task: null
174
+ name: first_evidence_claim_pair
175
+ reference: Relation between the claim and first evidence pair.
176
+ dc3e0a0b-4f4d-4a76-9e7b-eafce4967e98: !Template
177
+ answer_choices: Supports ||| Refutes ||| Not enough information
178
+ id: dc3e0a0b-4f4d-4a76-9e7b-eafce4967e98
179
+ jinja: 'Considering the following claim:
180
+
181
+ {{claim}}.
182
+
183
+ Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
184
+ enough info"}} on climate change?
185
+
186
+ {{evidences[3]["evidence"].strip(".").strip(''"'')}}.
187
+
188
+ |||
189
+
190
+ {{ answer_choices[evidences[3]["evidence_label"]] }}'
191
+ metadata: !TemplateMetadata
192
+ choices_in_prompt: null
193
+ metrics: []
194
+ original_task: null
195
+ name: fourth_evidence_claim_pair
196
+ reference: Relation between the claim and fourth evidence pair.
197
+ e3e01825-e256-4098-b7bb-aa07c399e8f6: !Template
198
+ answer_choices: Supports ||| Refutes ||| Not enough information
199
+ id: e3e01825-e256-4098-b7bb-aa07c399e8f6
200
+ jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
201
+ {{"refutes"}}, or provide {{"not enough info"}} on climate change?
202
+
203
+
204
+ Statement: {{claim}}
205
+
206
+
207
+ Evidence: {{evidences[1]["evidence"].strip(".").strip(''"'')}}.
208
+
209
+ |||
210
+
211
+ {{ answer_choices[evidences[1]["evidence_label"]] }}'
212
+ metadata: !TemplateMetadata
213
+ choices_in_prompt: null
214
+ metrics: []
215
+ original_task: null
216
+ name: second_evidence_and_claim_itemization
217
+ reference: Second evidence and claim with simple itemization
218
+ ff9c9c11-92f1-4cb2-a73c-d786d58b00e1: !Template
219
+ answer_choices: Supports ||| Refutes ||| Not enough information
220
+ id: ff9c9c11-92f1-4cb2-a73c-d786d58b00e1
221
+ jinja: 'Considering the following claim:
222
+
223
+ {{claim}}.
224
+
225
+ Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
226
+ enough info"}} on climate change?
227
+
228
+ {{evidences[2]["evidence"].strip(".").strip(''"'')}}.
229
+
230
+ |||
231
+
232
+ {{ answer_choices[evidences[2]["evidence_label"]] }}'
233
+ metadata: !TemplateMetadata
234
+ choices_in_prompt: null
235
+ metrics: []
236
+ original_task: null
237
+ name: third_evidence_claim_pair
238
+ reference: Relation between the claim and third evidence pair.