Spaces:
Runtime error
Runtime error
Add NER and filter for Transformers models
Browse files
app.py
CHANGED
|
@@ -14,12 +14,24 @@ AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
|
|
| 14 |
AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
|
| 15 |
|
| 16 |
|
| 17 |
-
TASK_TO_ID = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
with st.form(key="form"):
|
| 20 |
|
| 21 |
-
dataset_name = st.selectbox("Select a dataset to evaluate on", ["lewtun/autoevaluate__emotion"])
|
| 22 |
-
|
| 23 |
# TODO: remove this step once we select real datasets
|
| 24 |
# Strip out original dataset name
|
| 25 |
original_dataset_name = dataset_name.split("/")[-1].split("__")[-1]
|
|
@@ -27,13 +39,31 @@ with st.form(key="form"):
|
|
| 27 |
# In general this will be a list of multiple configs => need to generalise logic here
|
| 28 |
metadata = get_metadata(dataset_name)
|
| 29 |
|
| 30 |
-
dataset_config = st.selectbox("Select
|
| 31 |
|
| 32 |
splits = metadata[0]["splits"]
|
| 33 |
split_names = list(splits.values())
|
| 34 |
eval_split = splits.get("eval_split", split_names[0])
|
| 35 |
|
| 36 |
-
selected_split = st.selectbox("Select
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
compatible_models = get_compatible_models(metadata[0]["task"], original_dataset_name)
|
| 39 |
|
|
|
|
| 14 |
AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
|
| 15 |
|
| 16 |
|
| 17 |
+
TASK_TO_ID = {
|
| 18 |
+
"binary_classification": 1,
|
| 19 |
+
"multi_class_classification": 2,
|
| 20 |
+
"multi_label_classification": 3,
|
| 21 |
+
"entity_extraction": 4,
|
| 22 |
+
"extractive_question_answering": 5,
|
| 23 |
+
"translation": 6,
|
| 24 |
+
"summarization": 8,
|
| 25 |
+
"single_column_regression": 10,
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
# TODO: remove this hardcorded logic and accept any dataset on the Hub
|
| 29 |
+
DATASETS_TO_EVALUATE = ["emotion", "conll2003"]
|
| 30 |
+
|
| 31 |
+
dataset_name = st.selectbox("Select a dataset", [f"lewtun/autoevaluate__{dset}" for dset in DATASETS_TO_EVALUATE])
|
| 32 |
|
| 33 |
with st.form(key="form"):
|
| 34 |
|
|
|
|
|
|
|
| 35 |
# TODO: remove this step once we select real datasets
|
| 36 |
# Strip out original dataset name
|
| 37 |
original_dataset_name = dataset_name.split("/")[-1].split("__")[-1]
|
|
|
|
| 39 |
# In general this will be a list of multiple configs => need to generalise logic here
|
| 40 |
metadata = get_metadata(dataset_name)
|
| 41 |
|
| 42 |
+
dataset_config = st.selectbox("Select a config", [metadata[0]["config"]])
|
| 43 |
|
| 44 |
splits = metadata[0]["splits"]
|
| 45 |
split_names = list(splits.values())
|
| 46 |
eval_split = splits.get("eval_split", split_names[0])
|
| 47 |
|
| 48 |
+
selected_split = st.selectbox("Select a split", split_names, index=split_names.index(eval_split))
|
| 49 |
+
|
| 50 |
+
col_mapping = metadata[0]["col_mapping"]
|
| 51 |
+
col_names = list(col_mapping.values())
|
| 52 |
+
|
| 53 |
+
# TODO: figure out how to get all dataset column names (i.e. features) without download dataset itself
|
| 54 |
+
st.markdown("**Map your data columns**")
|
| 55 |
+
col1, col2 = st.columns(2)
|
| 56 |
+
|
| 57 |
+
with col1:
|
| 58 |
+
st.markdown("`text` column")
|
| 59 |
+
st.text("")
|
| 60 |
+
st.text("")
|
| 61 |
+
st.text("")
|
| 62 |
+
st.text("")
|
| 63 |
+
st.markdown("`target` column")
|
| 64 |
+
with col2:
|
| 65 |
+
st.selectbox("This column should contain the text you want to classify", col_names, index=0)
|
| 66 |
+
st.selectbox("This column should contain the labels you want to assign to the text", col_names, index=1)
|
| 67 |
|
| 68 |
compatible_models = get_compatible_models(metadata[0]["task"], original_dataset_name)
|
| 69 |
|
utils.py
CHANGED
|
@@ -32,6 +32,6 @@ def get_metadata(dataset_name):
|
|
| 32 |
|
| 33 |
|
| 34 |
def get_compatible_models(task, dataset_name):
|
| 35 |
-
filt = ModelFilter(task=task, trained_dataset=dataset_name)
|
| 36 |
compatible_models = api.list_models(filter=filt)
|
| 37 |
return [model.modelId for model in compatible_models]
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
def get_compatible_models(task, dataset_name):
|
| 35 |
+
filt = ModelFilter(task=task, trained_dataset=dataset_name, library="transformers")
|
| 36 |
compatible_models = api.list_models(filter=filt)
|
| 37 |
return [model.modelId for model in compatible_models]
|