AleksBlacky commited on
Commit
a432184
β€’
1 Parent(s): 66700ad

update - more checking user input

Browse files
Files changed (1) hide show
  1. app.py +24 -12
app.py CHANGED
@@ -8,25 +8,27 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
  st.markdown("# Hello, friend!")
9
  st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")
10
 
11
- # st.write("Loading tokenizer and dict")
12
- model_name_global = "allenai/scibert_scivocab_uncased"
13
- tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
14
- with open('./models/scibert/decode_dict.pkl', 'rb') as f:
15
- decode_dict = pickle.load(f)
 
 
16
 
17
  with st.form(key="my_form"):
18
  st.markdown("### 🎈 Do you want a little magic? ")
19
  st.markdown(" Write your article title and abstract to textboxes bellow and I'll gues topic of your paper! ")
20
- ce, c2, c3 = st.columns([0.07, 6, 0.07])
21
 
22
  with c2:
23
  doc_title = st.text_area(
24
- "Paste your abstract title below (max 100 words)",
25
  height=210,
26
  )
27
 
28
  doc_abstract = st.text_area(
29
- "Paste your abstract text below (max 100500 words)",
30
  height=410,
31
  )
32
 
@@ -35,6 +37,7 @@ with st.form(key="my_form"):
35
 
36
  len_title = len(re.findall(r"\w+", doc_title))
37
  len_abstract = len(re.findall(r"\w+", doc_abstract))
 
38
  if len_title > MAX_WORDS_TITLE:
39
  st.warning(
40
  "⚠️ Your title contains "
@@ -50,7 +53,7 @@ with st.form(key="my_form"):
50
  "⚠️ Your abstract contains "
51
  + str(len_abstract)
52
  + " words."
53
- + " Only the first 50 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
54
  )
55
 
56
  doc_abstract = doc_abstract[:MAX_WORDS_ABSTRACT]
@@ -60,6 +63,14 @@ with st.form(key="my_form"):
60
  if not submit_button:
61
  st.stop()
62
 
 
 
 
 
 
 
 
 
63
 
64
  # allow_output_mutation=True
65
  @st.cache(suppress_st_warning=True)
@@ -69,8 +80,6 @@ def load_model():
69
 
70
 
71
  def make_predict(tokens, decode_dict):
72
- # tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
73
- # tokens = tokenizer_(title + abstract, return_tensors="pt")
74
 
75
  model_ = load_model()
76
  outs = model_(tokens.input_ids)
@@ -87,7 +96,10 @@ model_local = "models/scibert/"
87
 
88
  title = doc_title
89
  abstract = doc_abstract
90
- tokens = tokenizer_(title + abstract, return_tensors="pt")
 
 
 
91
 
92
  predicts = make_predict(tokens, decode_dict)
93
 
8
  st.markdown("# Hello, friend!")
9
  st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")
10
 
11
+ try:
12
+ model_name_global = "allenai/scibert_scivocab_uncased"
13
+ tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
14
+ with open('./models/scibert/decode_dict.pkl', 'rb') as f:
15
+ decode_dict = pickle.load(f)
16
+ except ValueError:
17
+ st.error("Load tokenizer or decode answer dict goes wrong! Pls contact author alekseystepin13@gmail.com")
18
 
19
  with st.form(key="my_form"):
20
  st.markdown("### 🎈 Do you want a little magic? ")
21
  st.markdown(" Write your article title and abstract to textboxes bellow and I'll gues topic of your paper! ")
22
+ ce, c2, c3 = st.columns([0.07, 7, 0.07])
23
 
24
  with c2:
25
  doc_title = st.text_area(
26
+ "Paste your abstract title below (1 to 50 words)",
27
  height=210,
28
  )
29
 
30
  doc_abstract = st.text_area(
31
+ "Paste your abstract text below (1 to 500 words)",
32
  height=410,
33
  )
34
 
37
 
38
  len_title = len(re.findall(r"\w+", doc_title))
39
  len_abstract = len(re.findall(r"\w+", doc_abstract))
40
+
41
  if len_title > MAX_WORDS_TITLE:
42
  st.warning(
43
  "⚠️ Your title contains "
53
  "⚠️ Your abstract contains "
54
  + str(len_abstract)
55
  + " words."
56
+ + " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
57
  )
58
 
59
  doc_abstract = doc_abstract[:MAX_WORDS_ABSTRACT]
63
  if not submit_button:
64
  st.stop()
65
 
66
+ if len_title < 1:
67
+ st.error("Article without any words in title? Pls give me correct title!")
68
+ st.stop()
69
+
70
+ if len_abstract < 1:
71
+ st.error("Article without any words in abstract? Pls give me correct abstract!")
72
+ st.stop()
73
+
74
 
75
  # allow_output_mutation=True
76
  @st.cache(suppress_st_warning=True)
80
 
81
 
82
  def make_predict(tokens, decode_dict):
 
 
83
 
84
  model_ = load_model()
85
  outs = model_(tokens.input_ids)
96
 
97
  title = doc_title
98
  abstract = doc_abstract
99
+ try:
100
+ tokens = tokenizer_(title + abstract, return_tensors="pt")
101
+ except ValueError:
102
+ st.error("Word parsing into tokens went wrong! Is input valid? If yes, pls contact author alekseystepin13@gmail.com")
103
 
104
  predicts = make_predict(tokens, decode_dict)
105