Elron Bandel commited on
Commit
2df020e
1 Parent(s): 3411193

update code

Browse files
Files changed (2) hide show
  1. app.py +6 -51
  2. requirements.txt +1 -1
app.py CHANGED
@@ -12,31 +12,6 @@ st.set_page_config(
12
  initial_sidebar_state="expanded",
13
  )
14
 
15
- # st.markdown(
16
- # """
17
- # <style>
18
-
19
- # .sidebar .sidebar-content {
20
- # background-image: linear-gradient(#3377ff, #80aaff);
21
- # }
22
-
23
- # footer {
24
- # color:white;
25
- # visibility: hidden;
26
- # }
27
- # input {
28
- # direction: rtl;
29
- # }
30
- # .stTextInput .instructions {
31
- # color: grey;
32
- # font-size: 9px;}
33
-
34
- # </style>
35
- # <div style="color:white; font-size:13px; font-family:monospace;position: fixed; z-index: 1; bottom: 0; right:0; background-color: #f63766;margin:3px;padding:8px;border-radius: 5px;"><a href="https://huggingface.co/onlplab/alephbert-base" target="_blank" style="text-decoration: none;color: white;">Use aleph-bert in your project </a></div>
36
- # """,
37
- # unsafe_allow_html=True,
38
- # )
39
-
40
  models = {
41
  "AlephBERT-base": {
42
  "name_or_path":"onlplab/alephbert-base",
@@ -116,16 +91,8 @@ if mode == 'Models':
116
 
117
  st.markdown(''.join([f'<span style="color:white; font-size:13px; font-family:monospace; background-color: #f63766;margin:3px;padding:8px;border-radius: 5px;">{tag}</span>' for tag in model_tags]),unsafe_allow_html=True)
118
  st.markdown('___')
119
- ####
120
- #prepare the model
121
- ####
122
-
123
  unmasker, tokenize = load_model(model)
124
-
125
-
126
- ####
127
- # get inputs
128
- ####
129
 
130
  input_text = st.text_input('Insert text you want to mask', '')
131
  if input_text:
@@ -136,9 +103,9 @@ if mode == 'Models':
136
 
137
  if masking_level == 'Tokens':
138
  tokens = str(input_text).split()
139
- masked_token = st.selectbox('Select token to mask:', [''] + tokens)
140
- if masked_token != '':
141
- input_masked = ' '.join(token if token != masked_token else '[MASK]' for token in tokens)
142
  display_input = input_masked
143
  if masking_level == 'SubWords':
144
  tokens = subwords
@@ -157,25 +124,13 @@ if mode == 'Models':
157
  unsafe_allow_html=True,
158
  )
159
  st.markdown('#### Outputs:')
160
- res = unmasker(input_masked, tokenized=masking_level == 'SubWords', top_k=n_res)
 
161
  if res:
162
  res = [{'Prediction':r['token_str'], 'Completed Sentence':r['sequence'].replace('[SEP]', '').replace('[CLS]', ''), 'Score':r['score']} for r in res]
163
  res_table = pd.DataFrame(res)
164
  st.table(res_table)
165
 
166
-
167
-
168
- # cols = st.beta_columns(len(tokens))
169
- # genre = st.radio(
170
- # 'Select token to mask:', tokens)
171
- # for col, token in zip(cols, reversed(tokens)):
172
- # col.text(token)
173
-
174
- # st.text(tokens)
175
- # res = unmasker(input_text)
176
- # res_table = pd.DataFrame(res)
177
- # st.table(res_table)
178
- # st.text(res)
179
 
180
 
181
 
 
12
  initial_sidebar_state="expanded",
13
  )
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  models = {
16
  "AlephBERT-base": {
17
  "name_or_path":"onlplab/alephbert-base",
 
91
 
92
  st.markdown(''.join([f'<span style="color:white; font-size:13px; font-family:monospace; background-color: #f63766;margin:3px;padding:8px;border-radius: 5px;">{tag}</span>' for tag in model_tags]),unsafe_allow_html=True)
93
  st.markdown('___')
94
+
 
 
 
95
  unmasker, tokenize = load_model(model)
 
 
 
 
 
96
 
97
  input_text = st.text_input('Insert text you want to mask', '')
98
  if input_text:
 
103
 
104
  if masking_level == 'Tokens':
105
  tokens = str(input_text).split()
106
+ mask_idx = st.selectbox('Select token to mask:', [None] + list(range(len(tokens))), format_func=lambda i: tokens[i] if i else '')
107
+ if mask_idx is not None:
108
+ input_masked = ' '.join(token if i != mask_idx else '[MASK]' for i, token in enumerate(tokens))
109
  display_input = input_masked
110
  if masking_level == 'SubWords':
111
  tokens = subwords
 
124
  unsafe_allow_html=True,
125
  )
126
  st.markdown('#### Outputs:')
127
+ with st.spinner('Running model...'):
128
+ res = unmasker(input_masked, tokenized=masking_level == 'SubWords', top_k=n_res)
129
  if res:
130
  res = [{'Prediction':r['token_str'], 'Completed Sentence':r['sequence'].replace('[SEP]', '').replace('[CLS]', ''), 'Score':r['score']} for r in res]
131
  res_table = pd.DataFrame(res)
132
  st.table(res_table)
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
 
136
 
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  torch
2
  sentencepiece
3
- transformers==4.4.2
4
  tokenizers
5
  pandas
 
1
  torch
2
  sentencepiece
3
+ transformers==4.6.1
4
  tokenizers
5
  pandas