Tristan Thrush commited on
Commit
2d74fdd
β€’
2 Parent(s): 9bb22fc eb4376f

resolved conflict

Browse files
Files changed (2) hide show
  1. .github/workflows/sync_with_spaces.yml +2 -1
  2. app.py +21 -23
.github/workflows/sync_with_spaces.yml CHANGED
@@ -16,4 +16,5 @@ jobs:
16
  - name: Push to hub
17
  env:
18
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
19
- run: git push https://lewtun:$HF_TOKEN@huggingface.co/spaces/autoevaluate/autoevaluate main
 
 
16
  - name: Push to hub
17
  env:
18
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
19
+ run: |
20
+ git push https://lewtun:$HF_TOKEN@huggingface.co/spaces/autoevaluate/autoevaluate main
app.py CHANGED
@@ -41,12 +41,12 @@ TASK_TO_DEFAULT_METRICS = {
41
  "summarization": ["rouge1", "rouge2", "rougeL", "rougeLsum", "gen_len"],
42
  }
43
 
44
- supported_tasks = list(TASK_TO_ID.keys())
45
 
46
  @st.cache
47
  def get_supported_metrics():
48
  metrics = list_metrics()
49
- supported_metrics = {}
50
  for metric in tqdm(metrics):
51
  try:
52
  metric_func = load_metric(metric)
@@ -71,7 +71,7 @@ def get_supported_metrics():
71
  break
72
 
73
  if defaults:
74
- supported_metrics[metric] = argspec.kwonlydefaults
75
  return supported_metrics
76
 
77
  supported_metrics = get_supported_metrics()
@@ -102,7 +102,6 @@ selected_dataset = st.selectbox("Select a dataset", all_datasets, index=all_data
102
  st.experimental_set_query_params(**{"dataset": [selected_dataset]})
103
 
104
 
105
- # TODO: In general this will be a list of multiple configs => need to generalise logic here
106
  metadata = get_metadata(selected_dataset)
107
  if metadata is None:
108
  st.warning("No evaluation metadata found. Please configure the evaluation job below.")
@@ -111,8 +110,8 @@ with st.expander("Advanced configuration"):
111
  ## Select task
112
  selected_task = st.selectbox(
113
  "Select a task",
114
- supported_tasks,
115
- index=supported_tasks.index(metadata[0]["task_id"]) if metadata is not None else 0,
116
  )
117
  ### Select config
118
  configs = get_dataset_config_names(selected_dataset)
@@ -136,7 +135,7 @@ with st.expander("Advanced configuration"):
136
  ## Select columns
137
  rows_resp = http_get(
138
  path="/rows",
139
- domain="https://datasets-preview.huggingface.tech",
140
  params={"dataset": selected_dataset, "config": selected_config, "split": selected_split},
141
  ).json()
142
  col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns)
@@ -236,6 +235,9 @@ with st.expander("Advanced configuration"):
236
  col_mapping[target_col] = "target"
237
 
238
  elif selected_task == "extractive_question_answering":
 
 
 
239
  with col1:
240
  st.markdown("`context` column")
241
  st.text("")
@@ -257,26 +259,22 @@ with st.expander("Advanced configuration"):
257
  context_col = st.selectbox(
258
  "This column should contain the question's context",
259
  col_names,
260
- index=col_names.index(get_key(metadata[0]["col_mapping"], "context")) if metadata is not None else 0,
261
  )
262
  question_col = st.selectbox(
263
  "This column should contain the question to be answered, given the context",
264
  col_names,
265
- index=col_names.index(get_key(metadata[0]["col_mapping"], "question")) if metadata is not None else 0,
266
  )
267
  answers_text_col = st.selectbox(
268
  "This column should contain example answers to the question, extracted from the context",
269
  col_names,
270
- index=col_names.index(get_key(metadata[0]["col_mapping"], "answers.text"))
271
- if metadata is not None
272
- else 0,
273
  )
274
  answers_start_col = st.selectbox(
275
  "This column should contain the indices in the context of the first character of each answers.text",
276
  col_names,
277
- index=col_names.index(get_key(metadata[0]["col_mapping"], "answers.answer_start"))
278
- if metadata is not None
279
- else 0,
280
  )
281
  col_mapping[context_col] = "context"
282
  col_mapping[question_col] = "question"
@@ -287,19 +285,19 @@ with st.form(key="form"):
287
 
288
  compatible_models = get_compatible_models(selected_task, selected_dataset)
289
  st.markdown("The following metrics will be computed")
290
- html_string = " ".join(["<div style=\"padding-right:5px;padding-left:5px;padding-top:5px;padding-bottom:5px;float:left\"><div style=\"background-color:#D3D3D3;border-radius:5px;display:inline-block;padding-right:5px;padding-left:5px;color:white\">" + metric + "</div></div>" for metric in TASK_TO_DEFAULT_METRICS[selected_task]])
 
 
 
 
291
  st.markdown(html_string, unsafe_allow_html=True)
292
  selected_metrics = st.multiselect(
293
  "(Optional) Select additional metrics",
294
- list(set(supported_metrics.keys()) - set(TASK_TO_DEFAULT_METRICS[selected_task])),
295
  )
296
- for metric_name in selected_metrics:
297
- argument_string = ", ".join(["-".join(key, value) for key, value in supported_metrics[metric].items()])
298
- st.info(f"Note! The arguments for {metric_name} are: {argument_string}")
299
  selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
300
- print("Selected models:", selected_models)
301
  submit_button = st.form_submit_button("Make submission")
302
-
303
  if submit_button:
304
  project_id = str(uuid.uuid4())[:3]
305
  payload = {
@@ -355,7 +353,7 @@ with st.form(key="form"):
355
  f"""
356
  Evaluation takes appoximately 1 hour to complete, so grab a β˜• or 🍡 while you wait:
357
 
358
- * πŸ“Š Click [here](https://huggingface.co/spaces/huggingface/leaderboards) to view the results from your submission
359
  """
360
  )
361
  else:
 
41
  "summarization": ["rouge1", "rouge2", "rougeL", "rougeLsum", "gen_len"],
42
  }
43
 
44
+ SUPPORTED_TASKS = list(TASK_TO_ID.keys())
45
 
46
  @st.cache
47
  def get_supported_metrics():
48
  metrics = list_metrics()
49
+ supported_metrics = []
50
  for metric in tqdm(metrics):
51
  try:
52
  metric_func = load_metric(metric)
 
71
  break
72
 
73
  if defaults:
74
+ supported_metrics.append(metric)
75
  return supported_metrics
76
 
77
  supported_metrics = get_supported_metrics()
 
102
  st.experimental_set_query_params(**{"dataset": [selected_dataset]})
103
 
104
 
 
105
  metadata = get_metadata(selected_dataset)
106
  if metadata is None:
107
  st.warning("No evaluation metadata found. Please configure the evaluation job below.")
 
110
  ## Select task
111
  selected_task = st.selectbox(
112
  "Select a task",
113
+ SUPPORTED_TASKS,
114
+ index=SUPPORTED_TASKS.index(metadata[0]["task_id"]) if metadata is not None else 0,
115
  )
116
  ### Select config
117
  configs = get_dataset_config_names(selected_dataset)
 
135
  ## Select columns
136
  rows_resp = http_get(
137
  path="/rows",
138
+ domain=DATASETS_PREVIEW_API,
139
  params={"dataset": selected_dataset, "config": selected_config, "split": selected_split},
140
  ).json()
141
  col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns)
 
235
  col_mapping[target_col] = "target"
236
 
237
  elif selected_task == "extractive_question_answering":
238
+ col_mapping = metadata[0]["col_mapping"]
239
+ # Hub YAML parser converts periods to hyphens, so we remap them here
240
+ col_mapping = {k.replace("-", "."): v.replace("-", ".") for k, v in col_mapping.items()}
241
  with col1:
242
  st.markdown("`context` column")
243
  st.text("")
 
259
  context_col = st.selectbox(
260
  "This column should contain the question's context",
261
  col_names,
262
+ index=col_names.index(get_key(col_mapping, "context")) if metadata is not None else 0,
263
  )
264
  question_col = st.selectbox(
265
  "This column should contain the question to be answered, given the context",
266
  col_names,
267
+ index=col_names.index(get_key(col_mapping, "question")) if metadata is not None else 0,
268
  )
269
  answers_text_col = st.selectbox(
270
  "This column should contain example answers to the question, extracted from the context",
271
  col_names,
272
+ index=col_names.index(get_key(col_mapping, "answers.text")) if metadata is not None else 0,
 
 
273
  )
274
  answers_start_col = st.selectbox(
275
  "This column should contain the indices in the context of the first character of each answers.text",
276
  col_names,
277
+ index=col_names.index(get_key(col_mapping, "answers.answer_start")) if metadata is not None else 0,
 
 
278
  )
279
  col_mapping[context_col] = "context"
280
  col_mapping[question_col] = "question"
 
285
 
286
  compatible_models = get_compatible_models(selected_task, selected_dataset)
287
  st.markdown("The following metrics will be computed")
288
+ html_string = " ".join([
289
+ "<div style=\"padding-right:5px;padding-left:5px;padding-top:5px;padding-bottom:5px;float:left\">"
290
+ + "<div style=\"background-color:#D3D3D3;border-radius:5px;display:inline-block;padding-right:5px;padding-left:5px;color:white\">"
291
+ + metric + "</div></div>" for metric in TASK_TO_DEFAULT_METRICS[selected_task]
292
+ ])
293
  st.markdown(html_string, unsafe_allow_html=True)
294
  selected_metrics = st.multiselect(
295
  "(Optional) Select additional metrics",
296
+ list(set(supported_metrics) - set(TASK_TO_DEFAULT_METRICS[selected_task])),
297
  )
298
+ st.info("Note: user-selected metrics will be run with their default arguments from [here](https://github.com/huggingface/datasets/tree/master/metrics)")
 
 
299
  selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
 
300
  submit_button = st.form_submit_button("Make submission")
 
301
  if submit_button:
302
  project_id = str(uuid.uuid4())[:3]
303
  payload = {
 
353
  f"""
354
  Evaluation takes appoximately 1 hour to complete, so grab a β˜• or 🍡 while you wait:
355
 
356
+ * πŸ“Š Click [here](https://huggingface.co/spaces/autoevaluate/leaderboards) to view the results from your submission
357
  """
358
  )
359
  else: