lewtun HF staff commited on
Commit
294f139
1 Parent(s): f574f70

Add NER and filter for Transformers models

Browse files
Files changed (2) hide show
  1. app.py +35 -5
  2. utils.py +1 -1
app.py CHANGED
@@ -14,12 +14,24 @@ AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
14
  AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
15
 
16
 
17
- TASK_TO_ID = {"binary_classification":1, "multi_class_classification": 2, "multi_label_classification": 3, "entity_extraction": 4, "extractive_question_answering":5, "translation":6, "summarization":8, "single_column_regression":10}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  with st.form(key="form"):
20
 
21
- dataset_name = st.selectbox("Select a dataset to evaluate on", ["lewtun/autoevaluate__emotion"])
22
-
23
  # TODO: remove this step once we select real datasets
24
  # Strip out original dataset name
25
  original_dataset_name = dataset_name.split("/")[-1].split("__")[-1]
@@ -27,13 +39,31 @@ with st.form(key="form"):
27
  # In general this will be a list of multiple configs => need to generalise logic here
28
  metadata = get_metadata(dataset_name)
29
 
30
- dataset_config = st.selectbox("Select the subset to evaluate on", [metadata[0]["config"]])
31
 
32
  splits = metadata[0]["splits"]
33
  split_names = list(splits.values())
34
  eval_split = splits.get("eval_split", split_names[0])
35
 
36
- selected_split = st.selectbox("Select the split to evaluate on", split_names, index=split_names.index(eval_split))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  compatible_models = get_compatible_models(metadata[0]["task"], original_dataset_name)
39
 
14
  AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
15
 
16
 
17
+ TASK_TO_ID = {
18
+ "binary_classification": 1,
19
+ "multi_class_classification": 2,
20
+ "multi_label_classification": 3,
21
+ "entity_extraction": 4,
22
+ "extractive_question_answering": 5,
23
+ "translation": 6,
24
+ "summarization": 8,
25
+ "single_column_regression": 10,
26
+ }
27
+
28
+ # TODO: remove this hardcorded logic and accept any dataset on the Hub
29
+ DATASETS_TO_EVALUATE = ["emotion", "conll2003"]
30
+
31
+ dataset_name = st.selectbox("Select a dataset", [f"lewtun/autoevaluate__{dset}" for dset in DATASETS_TO_EVALUATE])
32
 
33
  with st.form(key="form"):
34
 
 
 
35
  # TODO: remove this step once we select real datasets
36
  # Strip out original dataset name
37
  original_dataset_name = dataset_name.split("/")[-1].split("__")[-1]
39
  # In general this will be a list of multiple configs => need to generalise logic here
40
  metadata = get_metadata(dataset_name)
41
 
42
+ dataset_config = st.selectbox("Select a config", [metadata[0]["config"]])
43
 
44
  splits = metadata[0]["splits"]
45
  split_names = list(splits.values())
46
  eval_split = splits.get("eval_split", split_names[0])
47
 
48
+ selected_split = st.selectbox("Select a split", split_names, index=split_names.index(eval_split))
49
+
50
+ col_mapping = metadata[0]["col_mapping"]
51
+ col_names = list(col_mapping.values())
52
+
53
+ # TODO: figure out how to get all dataset column names (i.e. features) without download dataset itself
54
+ st.markdown("**Map your data columns**")
55
+ col1, col2 = st.columns(2)
56
+
57
+ with col1:
58
+ st.markdown("`text` column")
59
+ st.text("")
60
+ st.text("")
61
+ st.text("")
62
+ st.text("")
63
+ st.markdown("`target` column")
64
+ with col2:
65
+ st.selectbox("This column should contain the text you want to classify", col_names, index=0)
66
+ st.selectbox("This column should contain the labels you want to assign to the text", col_names, index=1)
67
 
68
  compatible_models = get_compatible_models(metadata[0]["task"], original_dataset_name)
69
 
utils.py CHANGED
@@ -32,6 +32,6 @@ def get_metadata(dataset_name):
32
 
33
 
34
  def get_compatible_models(task, dataset_name):
35
- filt = ModelFilter(task=task, trained_dataset=dataset_name)
36
  compatible_models = api.list_models(filter=filt)
37
  return [model.modelId for model in compatible_models]
32
 
33
 
34
  def get_compatible_models(task, dataset_name):
35
+ filt = ModelFilter(task=task, trained_dataset=dataset_name, library="transformers")
36
  compatible_models = api.list_models(filter=filt)
37
  return [model.modelId for model in compatible_models]