Spaces:

AleksBlacky
/

Arxiv_paper_classifier

Runtime error

App Files Files Community

AleksBlacky commited on Mar 29, 2022

Commit

a432184

•

1 Parent(s): 66700ad

update - more checking user input

Browse files

Files changed (1) hide show

app.py +24 -12

app.py CHANGED Viewed

@@ -8,25 +8,27 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 st.markdown("# Hello, friend!")
 st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")
-# st.write("Loading tokenizer and dict")
-model_name_global = "allenai/scibert_scivocab_uncased"
-tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
-with open('./models/scibert/decode_dict.pkl', 'rb') as f:
-    decode_dict = pickle.load(f)
 with st.form(key="my_form"):
     st.markdown("### 🎈 Do you want a little magic?  ")
     st.markdown(" Write your article title and abstract to textboxes bellow and I'll gues topic of your paper!  ")
-    ce, c2, c3 = st.columns([0.07, 6, 0.07])
     with c2:
         doc_title = st.text_area(
-            "Paste your abstract title below (max 100 words)",
             height=210,
         )
         doc_abstract = st.text_area(
-            "Paste your abstract text below (max 100500 words)",
             height=410,
         )
@@ -35,6 +37,7 @@ with st.form(key="my_form"):
         len_title = len(re.findall(r"\w+", doc_title))
         len_abstract = len(re.findall(r"\w+", doc_abstract))
         if len_title > MAX_WORDS_TITLE:
             st.warning(
                 "⚠️ Your title contains "
@@ -50,7 +53,7 @@ with st.form(key="my_form"):
                 "⚠️ Your abstract contains "
                 + str(len_abstract)
                 + " words."
-                + " Only the first 50 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
             )
             doc_abstract = doc_abstract[:MAX_WORDS_ABSTRACT]
@@ -60,6 +63,14 @@ with st.form(key="my_form"):
 if not submit_button:
     st.stop()
 #  allow_output_mutation=True
 @st.cache(suppress_st_warning=True)
@@ -69,8 +80,6 @@ def load_model():
 def make_predict(tokens, decode_dict):
-    # tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
-    # tokens = tokenizer_(title + abstract, return_tensors="pt")
     model_ = load_model()
     outs = model_(tokens.input_ids)
@@ -87,7 +96,10 @@ model_local = "models/scibert/"
 title = doc_title
 abstract = doc_abstract
-tokens = tokenizer_(title + abstract, return_tensors="pt")
 predicts = make_predict(tokens, decode_dict)

 st.markdown("# Hello, friend!")
 st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")
+try:
+    model_name_global = "allenai/scibert_scivocab_uncased"
+    tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
+    with open('./models/scibert/decode_dict.pkl', 'rb') as f:
+        decode_dict = pickle.load(f)
+except ValueError:
+    st.error("Load tokenizer or decode answer dict goes wrong! Pls contact author alekseystepin13@gmail.com")
 with st.form(key="my_form"):
     st.markdown("### 🎈 Do you want a little magic?  ")
     st.markdown(" Write your article title and abstract to textboxes bellow and I'll gues topic of your paper!  ")
+    ce, c2, c3 = st.columns([0.07, 7, 0.07])
     with c2:
         doc_title = st.text_area(
+            "Paste your abstract title below (1 to 50 words)",
             height=210,
         )
         doc_abstract = st.text_area(
+            "Paste your abstract text below (1 to 500 words)",
             height=410,
         )
         len_title = len(re.findall(r"\w+", doc_title))
         len_abstract = len(re.findall(r"\w+", doc_abstract))
         if len_title > MAX_WORDS_TITLE:
             st.warning(
                 "⚠️ Your title contains "
                 "⚠️ Your abstract contains "
                 + str(len_abstract)
                 + " words."
+                + " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
             )
             doc_abstract = doc_abstract[:MAX_WORDS_ABSTRACT]
 if not submit_button:
     st.stop()
+if len_title < 1:
+    st.error("Article without any words in title? Pls give me correct title!")
+    st.stop()
+if len_abstract < 1:
+    st.error("Article without any words in abstract? Pls give me correct abstract!")
+    st.stop()
 #  allow_output_mutation=True
 @st.cache(suppress_st_warning=True)
 def make_predict(tokens, decode_dict):
     model_ = load_model()
     outs = model_(tokens.input_ids)
 title = doc_title
 abstract = doc_abstract
+try:
+    tokens = tokenizer_(title + abstract, return_tensors="pt")
+except ValueError:
+    st.error("Word parsing into tokens went wrong! Is input valid? If yes, pls contact author alekseystepin13@gmail.com")
 predicts = make_predict(tokens, decode_dict)