taka-yamakoshi commited on
Commit
4ae941b
1 Parent(s): 0aa4961
Files changed (1) hide show
  1. app.py +15 -12
app.py CHANGED
@@ -58,7 +58,7 @@ def clear_data():
58
  del st.session_state[key]
59
 
60
  def annotate_mask(sent_id,sent):
61
- st.write(f'Sentence {sent_id}')
62
  input_sent = tokenizer(sent).input_ids
63
  decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
64
  st.session_state[f'decoded_sent_{sent_id}'] = decoded_sent
@@ -77,7 +77,7 @@ def annotate_mask(sent_id,sent):
77
  mask_locs=st.session_state[f'mask_locs_{sent_id}'])
78
 
79
  def annotate_options(sent_id,sent):
80
- st.write(f'Sentence {sent_id}')
81
  input_sent = tokenizer(sent).input_ids
82
  decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
83
  char_nums = [len(word)+2 for word in decoded_sent]
@@ -177,7 +177,7 @@ if __name__=='__main__':
177
  st.session_state['page_status'] = 'type_in'
178
 
179
  if st.session_state['page_status']=='type_in':
180
- show_instruction('1. Type in the sentences and click "Tokenize"')
181
  sent_1 = st.text_input('Sentence 1',value="Paul tried to call George on the phone, but he wasn't successful.")
182
  sent_2 = st.text_input('Sentence 2',value="Paul tried to call George on the phone, but he wasn't available.")
183
  if st.button('Tokenize'):
@@ -190,7 +190,7 @@ if __name__=='__main__':
190
  sent_1 = st.session_state['sent_1']
191
  sent_2 = st.session_state['sent_2']
192
 
193
- show_instruction('2. Select sites to mask out and click "Confirm"')
194
  annotate_mask(1,sent_1)
195
  annotate_mask(2,sent_2)
196
  if st.button('Confirm',key='mask'):
@@ -212,12 +212,12 @@ if __name__=='__main__':
212
  with main_area.container():
213
  sent_1 = st.session_state['sent_1']
214
  sent_2 = st.session_state['sent_2']
215
- show_annotated_sentence(st.session_state['decoded_sent_1'],
216
- option_locs=st.session_state['option_locs_1'],
217
- mask_locs=st.session_state['mask_locs_1'])
218
- show_annotated_sentence(st.session_state['decoded_sent_2'],
219
- option_locs=st.session_state['option_locs_2'],
220
- mask_locs=st.session_state['mask_locs_2'])
221
 
222
  option_1_locs, option_2_locs = {}, {}
223
  pron_locs = {}
@@ -256,7 +256,10 @@ if __name__=='__main__':
256
  [probs_original[0,1][0],probs_original[1,1][0]]],
257
  columns=[tokenizer.decode(option_1_tokens),tokenizer.decode(option_2_tokens)],
258
  index=['Sentence 1','Sentence 2'])
259
- st.dataframe(df.style.highlight_max(axis=1))
 
 
 
260
 
261
  compare_1 = np.array(masked_ids_option_1['sent_1'])!=np.array(masked_ids_option_1['sent_2'])
262
  compare_2 = np.array(masked_ids_option_2['sent_1'])!=np.array(masked_ids_option_2['sent_2'])
@@ -268,7 +271,7 @@ if __name__=='__main__':
268
  assert np.all(np.array(option_1_locs['sent_1'])==np.array(option_1_locs['sent_2']))
269
  assert np.all(np.array(option_2_locs['sent_1'])==np.array(option_2_locs['sent_2']))
270
  token_id_list = pron_locs['sent_1'] + option_1_locs['sent_1'] + option_2_locs['sent_1'] + context_locs
271
- st.write(token_id_list)
272
 
273
  effect_array = []
274
  for token_id in token_id_list:
 
58
  del st.session_state[key]
59
 
60
  def annotate_mask(sent_id,sent):
61
+ show_instruction(f'Sentence {sent_id}',fontsize=12)
62
  input_sent = tokenizer(sent).input_ids
63
  decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
64
  st.session_state[f'decoded_sent_{sent_id}'] = decoded_sent
 
77
  mask_locs=st.session_state[f'mask_locs_{sent_id}'])
78
 
79
  def annotate_options(sent_id,sent):
80
+ show_instruction(f'Sentence {sent_id}',fontsize=12)
81
  input_sent = tokenizer(sent).input_ids
82
  decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
83
  char_nums = [len(word)+2 for word in decoded_sent]
 
177
  st.session_state['page_status'] = 'type_in'
178
 
179
  if st.session_state['page_status']=='type_in':
180
+ show_instruction('1. Type in the sentences and click "Tokenize"',fontsize=16)
181
  sent_1 = st.text_input('Sentence 1',value="Paul tried to call George on the phone, but he wasn't successful.")
182
  sent_2 = st.text_input('Sentence 2',value="Paul tried to call George on the phone, but he wasn't available.")
183
  if st.button('Tokenize'):
 
190
  sent_1 = st.session_state['sent_1']
191
  sent_2 = st.session_state['sent_2']
192
 
193
+ show_instruction('2. Select sites to mask out and click "Confirm"',fontsize=16)
194
  annotate_mask(1,sent_1)
195
  annotate_mask(2,sent_2)
196
  if st.button('Confirm',key='mask'):
 
212
  with main_area.container():
213
  sent_1 = st.session_state['sent_1']
214
  sent_2 = st.session_state['sent_2']
215
+ #show_annotated_sentence(st.session_state['decoded_sent_1'],
216
+ # option_locs=st.session_state['option_locs_1'],
217
+ # mask_locs=st.session_state['mask_locs_1'])
218
+ #show_annotated_sentence(st.session_state['decoded_sent_2'],
219
+ # option_locs=st.session_state['option_locs_2'],
220
+ # mask_locs=st.session_state['mask_locs_2'])
221
 
222
  option_1_locs, option_2_locs = {}, {}
223
  pron_locs = {}
 
256
  [probs_original[0,1][0],probs_original[1,1][0]]],
257
  columns=[tokenizer.decode(option_1_tokens),tokenizer.decode(option_2_tokens)],
258
  index=['Sentence 1','Sentence 2'])
259
+ cols = st.columns(3)
260
+ with cols[1]:
261
+ show_instruction('Prob. of predicting each option in each sentence',fontsize=12)
262
+ st.dataframe(df.style.highlight_max(axis=1),use_container_width=True)
263
 
264
  compare_1 = np.array(masked_ids_option_1['sent_1'])!=np.array(masked_ids_option_1['sent_2'])
265
  compare_2 = np.array(masked_ids_option_2['sent_1'])!=np.array(masked_ids_option_2['sent_2'])
 
271
  assert np.all(np.array(option_1_locs['sent_1'])==np.array(option_1_locs['sent_2']))
272
  assert np.all(np.array(option_2_locs['sent_1'])==np.array(option_2_locs['sent_2']))
273
  token_id_list = pron_locs['sent_1'] + option_1_locs['sent_1'] + option_2_locs['sent_1'] + context_locs
274
+ #st.write(token_id_list)
275
 
276
  effect_array = []
277
  for token_id in token_id_list: