Spaces:

mlgeis
/

arxiv-subject-classifier-demo

Runtime error

Michael-Geis commited on Aug 17, 2023

Commit

7af8946

•

1 Parent(s): 3050c48

added slider bar for user controlled confidence on tags

Files changed (3) hide show

app.py CHANGED Viewed

@@ -46,16 +46,16 @@ def parse_title(input_title):
     return (title, subject_tags)
-def outputs_from_id(input_id):
     title, true_tags = parse_id(input_id)
-    predicted_tags = predict_from_text(title)
     return title, predicted_tags, true_tags
-def outputs_from_title(input_title):
     title, true_tags = parse_title(input_title)
-    predicted_tags = predict_from_text(title)
     return title, predicted_tags, true_tags
@@ -72,6 +72,8 @@ with gr.Blocks() as demo:
             id_true = gr.Textbox(label="True tags")
         id_button = gr.Button("Predict")
         gr.Examples(
             examples=[
                 "1706.03762",
@@ -102,11 +104,13 @@ with gr.Blocks() as demo:
         )
     id_button.click(
-        outputs_from_id, inputs=id_input, outputs=[id_title, id_predict, id_true]
     )
     title_button.click(
         outputs_from_title,
-        inputs=title_input,
         outputs=[title_title, title_predict, title_true],
     )

     return (title, subject_tags)
+def outputs_from_id(input_id, threshold_probability):
     title, true_tags = parse_id(input_id)
+    predicted_tags = predict_from_text(title, threshold_probability)
     return title, predicted_tags, true_tags
+def outputs_from_title(input_title, threshold_probability):
     title, true_tags = parse_title(input_title)
+    predicted_tags = predict_from_text(title, threshold_probability)
     return title, predicted_tags, true_tags
             id_true = gr.Textbox(label="True tags")
         id_button = gr.Button("Predict")
+        threshold_probability = gr.Slider(minimum=0, maximum=1)
         gr.Examples(
             examples=[
                 "1706.03762",
         )
     id_button.click(
+        outputs_from_id,
+        inputs=[id_input, threshold_probability],
+        outputs=[id_title, id_predict, id_true],
     )
     title_button.click(
         outputs_from_title,
+        inputs=[title_input, threshold_probability],
         outputs=[title_title, title_predict, title_true],
     )

model.py CHANGED Viewed

@@ -7,7 +7,7 @@ from preprocess import cleanse
 from postprocess import postprocess
-def predict_from_text(input_text):
     ## Load model and create pipeline
     tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
     model = AutoModelForSequenceClassification.from_pretrained(
@@ -19,7 +19,7 @@ def predict_from_text(input_text):
     clean_title = cleanse(input_text)
     model_output = pipe(clean_title)
-    prediction = postprocess(model_output)
     if len(prediction) == 0:
         predict_output = "No matching tags."

 from postprocess import postprocess
+def predict_from_text(input_text, threshold_probability):
     ## Load model and create pipeline
     tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
     model = AutoModelForSequenceClassification.from_pretrained(
     clean_title = cleanse(input_text)
     model_output = pipe(clean_title)
+    prediction = postprocess(model_output, threshold_probability=threshold_probability)
     if len(prediction) == 0:
         predict_output = "No matching tags."

postprocess.py CHANGED Viewed

@@ -1,12 +1,14 @@
 import json
-def postprocess(model_output):
     with open("./data/arxiv-label-dict.json", "r") as file:
         subject_dict = json.loads(file.read())
     predicted_tags = [
-        result["label"] for result in model_output[0] if result["score"] > 0.5
     ]
     return sorted([subject_dict[tag] for tag in predicted_tags])

 import json
+def postprocess(model_output, threshold_probability):
     with open("./data/arxiv-label-dict.json", "r") as file:
         subject_dict = json.loads(file.read())
     predicted_tags = [
+        result["label"]
+        for result in model_output[0]
+        if result["score"] > threshold_probability
     ]
     return sorted([subject_dict[tag] for tag in predicted_tags])