Spaces:

autoevaluate
/

model-evaluator

Runtime error

App Files Files Community

lewtun HF Staff commited on Aug 26, 2022

Commit

aacdddf

1 Parent(s): 118ffe4

Handle multiple configs

Browse files

Files changed (2) hide show

app.py +46 -32
utils.py +11 -0

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ from utils import (
     create_autotrain_project_name,
     format_col_mapping,
     get_compatible_models,
     get_dataset_card_url,
     get_key,
     get_metadata,
@@ -123,16 +124,6 @@ SUPPORTED_METRICS = [
 ]
-def get_config_metadata(config, metadata=None):
-    if metadata is None:
-        return None
-    config_metadata = [m for m in metadata if m["config"] == config]
-    if len(config_metadata) == 1:
-        return config_metadata[0]
-    else:
-        return None
 #######
 # APP #
 #######
@@ -190,10 +181,6 @@ if metadata is None:
 with st.expander("Advanced configuration"):
     # Select task
-    # Hack to filter for unsupported tasks
-    # TODO(lewtun): remove this once we have SQuAD metrics support
-    if metadata is not None and metadata[0]["task_id"] in UNSUPPORTED_TASKS:
-        metadata = None
     selected_task = st.selectbox(
         "Select a task",
         SUPPORTED_TASKS,
@@ -211,6 +198,9 @@ with st.expander("Advanced configuration"):
             See the [docs](https://huggingface.co/docs/datasets/master/en/load_hub#configurations) for more details.
             """,
     )
     # Select splits
     splits_resp = http_get(
@@ -225,8 +215,8 @@ with st.expander("Advanced configuration"):
             if split["config"] == selected_config:
                 split_names.append(split["split"])
-        if metadata is not None:
-            eval_split = metadata[0]["splits"].get("eval_split", None)
         else:
             eval_split = None
         selected_split = st.selectbox(
@@ -270,12 +260,16 @@ with st.expander("Advanced configuration"):
             text_col = st.selectbox(
                 "This column should contain the text to be classified",
                 col_names,
-                index=col_names.index(get_key(metadata[0]["col_mapping"], "text")) if metadata is not None else 0,
             )
             target_col = st.selectbox(
                 "This column should contain the labels associated with the text",
                 col_names,
-                index=col_names.index(get_key(metadata[0]["col_mapping"], "target")) if metadata is not None else 0,
             )
             col_mapping[text_col] = "text"
             col_mapping[target_col] = "target"
@@ -289,11 +283,13 @@ with st.expander("Advanced configuration"):
             st.text("")
             st.text("")
             st.text("")
             st.markdown("`text2` column")
             st.text("")
             st.text("")
             st.text("")
             st.text("")
             st.markdown("`target` column")
         with col2:
             text1_col = st.selectbox(
@@ -333,12 +329,16 @@ with st.expander("Advanced configuration"):
             tokens_col = st.selectbox(
                 "This column should contain the array of tokens to be classified",
                 col_names,
-                index=col_names.index(get_key(metadata[0]["col_mapping"], "tokens")) if metadata is not None else 0,
             )
             tags_col = st.selectbox(
                 "This column should contain the labels associated with each part of the text",
                 col_names,
-                index=col_names.index(get_key(metadata[0]["col_mapping"], "tags")) if metadata is not None else 0,
             )
             col_mapping[tokens_col] = "tokens"
             col_mapping[tags_col] = "tags"
@@ -355,12 +355,16 @@ with st.expander("Advanced configuration"):
             text_col = st.selectbox(
                 "This column should contain the text to be translated",
                 col_names,
-                index=col_names.index(get_key(metadata[0]["col_mapping"], "source")) if metadata is not None else 0,
             )
             target_col = st.selectbox(
                 "This column should contain the target translation",
                 col_names,
-                index=col_names.index(get_key(metadata[0]["col_mapping"], "target")) if metadata is not None else 0,
             )
             col_mapping[text_col] = "source"
             col_mapping[target_col] = "target"
@@ -377,19 +381,23 @@ with st.expander("Advanced configuration"):
             text_col = st.selectbox(
                 "This column should contain the text to be summarized",
                 col_names,
-                index=col_names.index(get_key(metadata[0]["col_mapping"], "text")) if metadata is not None else 0,
             )
             target_col = st.selectbox(
                 "This column should contain the target summary",
                 col_names,
-                index=col_names.index(get_key(metadata[0]["col_mapping"], "target")) if metadata is not None else 0,
             )
             col_mapping[text_col] = "text"
             col_mapping[target_col] = "target"
     elif selected_task == "extractive_question_answering":
-        if metadata is not None:
-            col_mapping = metadata[0]["col_mapping"]
             # Hub YAML parser converts periods to hyphens, so we remap them here
             col_mapping = format_col_mapping(col_mapping)
         with col1:
@@ -413,22 +421,24 @@ with st.expander("Advanced configuration"):
             context_col = st.selectbox(
                 "This column should contain the question's context",
                 col_names,
-                index=col_names.index(get_key(col_mapping, "context")) if metadata is not None else 0,
             )
             question_col = st.selectbox(
                 "This column should contain the question to be answered, given the context",
                 col_names,
-                index=col_names.index(get_key(col_mapping, "question")) if metadata is not None else 0,
             )
             answers_text_col = st.selectbox(
                 "This column should contain example answers to the question, extracted from the context",
                 col_names,
-                index=col_names.index(get_key(col_mapping, "answers.text")) if metadata is not None else 0,
             )
             answers_start_col = st.selectbox(
                 "This column should contain the indices in the context of the first character of each `answers.text`",
                 col_names,
-                index=col_names.index(get_key(col_mapping, "answers.answer_start")) if metadata is not None else 0,
             )
             col_mapping[context_col] = "context"
             col_mapping[question_col] = "question"
@@ -446,12 +456,16 @@ with st.expander("Advanced configuration"):
             image_col = st.selectbox(
                 "This column should contain the images to be classified",
                 col_names,
-                index=col_names.index(get_key(metadata[0]["col_mapping"], "image")) if metadata is not None else 0,
             )
             target_col = st.selectbox(
                 "This column should contain the labels associated with the images",
                 col_names,
-                index=col_names.index(get_key(metadata[0]["col_mapping"], "target")) if metadata is not None else 0,
             )
             col_mapping[image_col] = "image"
             col_mapping[target_col] = "target"

     create_autotrain_project_name,
     format_col_mapping,
     get_compatible_models,
+    get_config_metadata,
     get_dataset_card_url,
     get_key,
     get_metadata,
 ]
 #######
 # APP #
 #######
 with st.expander("Advanced configuration"):
     # Select task
     selected_task = st.selectbox(
         "Select a task",
         SUPPORTED_TASKS,
             See the [docs](https://huggingface.co/docs/datasets/master/en/load_hub#configurations) for more details.
             """,
     )
+    # Get metadata for config
+    config_metadata = get_config_metadata(selected_config, metadata)
+    print(f"INFO -- Config metadata: {config_metadata}")
     # Select splits
     splits_resp = http_get(
             if split["config"] == selected_config:
                 split_names.append(split["split"])
+        if config_metadata is not None:
+            eval_split = config_metadata["splits"].get("eval_split", None)
         else:
             eval_split = None
         selected_split = st.selectbox(
             text_col = st.selectbox(
                 "This column should contain the text to be classified",
                 col_names,
+                index=col_names.index(get_key(config_metadata["col_mapping"], "text"))
+                if config_metadata is not None
+                else 0,
             )
             target_col = st.selectbox(
                 "This column should contain the labels associated with the text",
                 col_names,
+                index=col_names.index(get_key(config_metadata["col_mapping"], "target"))
+                if config_metadata is not None
+                else 0,
             )
             col_mapping[text_col] = "text"
             col_mapping[target_col] = "target"
             st.text("")
             st.text("")
             st.text("")
+            st.text("")
             st.markdown("`text2` column")
             st.text("")
             st.text("")
             st.text("")
             st.text("")
+            st.text("")
             st.markdown("`target` column")
         with col2:
             text1_col = st.selectbox(
             tokens_col = st.selectbox(
                 "This column should contain the array of tokens to be classified",
                 col_names,
+                index=col_names.index(get_key(config_metadata["col_mapping"], "tokens"))
+                if config_metadata is not None
+                else 0,
             )
             tags_col = st.selectbox(
                 "This column should contain the labels associated with each part of the text",
                 col_names,
+                index=col_names.index(get_key(config_metadata["col_mapping"], "tags"))
+                if config_metadata is not None
+                else 0,
             )
             col_mapping[tokens_col] = "tokens"
             col_mapping[tags_col] = "tags"
             text_col = st.selectbox(
                 "This column should contain the text to be translated",
                 col_names,
+                index=col_names.index(get_key(config_metadata["col_mapping"], "source"))
+                if config_metadata is not None
+                else 0,
             )
             target_col = st.selectbox(
                 "This column should contain the target translation",
                 col_names,
+                index=col_names.index(get_key(config_metadata["col_mapping"], "target"))
+                if config_metadata is not None
+                else 0,
             )
             col_mapping[text_col] = "source"
             col_mapping[target_col] = "target"
             text_col = st.selectbox(
                 "This column should contain the text to be summarized",
                 col_names,
+                index=col_names.index(get_key(config_metadata["col_mapping"], "text"))
+                if config_metadata is not None
+                else 0,
             )
             target_col = st.selectbox(
                 "This column should contain the target summary",
                 col_names,
+                index=col_names.index(get_key(config_metadata["col_mapping"], "target"))
+                if config_metadata is not None
+                else 0,
             )
             col_mapping[text_col] = "text"
             col_mapping[target_col] = "target"
     elif selected_task == "extractive_question_answering":
+        if config_metadata is not None:
+            col_mapping = config_metadata["col_mapping"]
             # Hub YAML parser converts periods to hyphens, so we remap them here
             col_mapping = format_col_mapping(col_mapping)
         with col1:
             context_col = st.selectbox(
                 "This column should contain the question's context",
                 col_names,
+                index=col_names.index(get_key(col_mapping, "context")) if config_metadata is not None else 0,
             )
             question_col = st.selectbox(
                 "This column should contain the question to be answered, given the context",
                 col_names,
+                index=col_names.index(get_key(col_mapping, "question")) if config_metadata is not None else 0,
             )
             answers_text_col = st.selectbox(
                 "This column should contain example answers to the question, extracted from the context",
                 col_names,
+                index=col_names.index(get_key(col_mapping, "answers.text")) if config_metadata is not None else 0,
             )
             answers_start_col = st.selectbox(
                 "This column should contain the indices in the context of the first character of each `answers.text`",
                 col_names,
+                index=col_names.index(get_key(col_mapping, "answers.answer_start"))
+                if config_metadata is not None
+                else 0,
             )
             col_mapping[context_col] = "context"
             col_mapping[question_col] = "question"
             image_col = st.selectbox(
                 "This column should contain the images to be classified",
                 col_names,
+                index=col_names.index(get_key(config_metadata["col_mapping"], "image"))
+                if config_metadata is not None
+                else 0,
             )
             target_col = st.selectbox(
                 "This column should contain the labels associated with the images",
                 col_names,
+                index=col_names.index(get_key(config_metadata["col_mapping"], "target"))
+                if config_metadata is not None
+                else 0,
             )
             col_mapping[image_col] = "image"
             col_mapping[target_col] = "target"

utils.py CHANGED Viewed

@@ -198,3 +198,14 @@ def create_autotrain_project_name(dataset_id: str) -> str:
     # Project names need to be unique, so we append a random string to guarantee this
     project_id = str(uuid.uuid4())[:8]
     return f"eval-project-{dataset_id_formatted}-{project_id}"

     # Project names need to be unique, so we append a random string to guarantee this
     project_id = str(uuid.uuid4())[:8]
     return f"eval-project-{dataset_id_formatted}-{project_id}"
+def get_config_metadata(config: str, metadata: List[Dict] = None) -> Union[Dict, None]:
+    """Gets the dataset card metadata for the given config."""
+    if metadata is None:
+        return None
+    config_metadata = [m for m in metadata if m["config"] == config]
+    if len(config_metadata) == 1:
+        return config_metadata[0]
+    else:
+        return None