taka-yamakoshi commited on
Commit
228552f
1 Parent(s): 9d4d0bb
Files changed (1) hide show
  1. app.py +18 -13
app.py CHANGED
@@ -48,20 +48,25 @@ if __name__=='__main__':
48
  tokenizer = load_model('albert-xxlarge-v2')
49
  sent_cols = st.columns(2)
50
  num_tokens = {}
 
51
  for sent_id, sent_col in enumerate(sent_cols):
52
  with sent_col:
53
  sentence = st.text_input(f'Sentence {sent_id+1}')
54
- input_sent = tokenizer(sentence)['input_ids']
55
- decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
56
- num_tokens[f'sent_{sent_id}'] = len(decoded_sent)
 
 
57
 
58
- char_nums = [len(word)+2 for word in decoded_sent]
59
- word_cols = st.columns(char_nums)
60
- for word_col,word in zip(word_cols,decoded_sent):
61
- with word_col:
62
- st.write(word)
63
- st.write(f'{len(decoded_sent)} tokens')
64
- if num_tokens[f'sent_1']==num_tokens[f'sent_2']:
65
- st.subheader('Matched!')
66
- else:
67
- st.subheader('Not Matched...')
 
 
 
48
  tokenizer = load_model('albert-xxlarge-v2')
49
  sent_cols = st.columns(2)
50
  num_tokens = {}
51
+ sents = {}
52
  for sent_id, sent_col in enumerate(sent_cols):
53
  with sent_col:
54
  sentence = st.text_input(f'Sentence {sent_id+1}')
55
+ sents[f'sent_{sent_id}'] = sentence
56
+ if len(sentence)>0:
57
+ input_sent = tokenizer(sentence)['input_ids']
58
+ decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
59
+ num_tokens[f'sent_{sent_id}'] = len(decoded_sent)
60
 
61
+ char_nums = [len(word)+2 for word in decoded_sent]
62
+ word_cols = st.columns(char_nums)
63
+ for word_col,word in zip(word_cols,decoded_sent):
64
+ with word_col:
65
+ st.write(word)
66
+ st.write(f'{len(decoded_sent)} tokens')
67
+
68
+ if len(sents['sent_1'])>0 and len(sents['sent_2'])>0:
69
+ if num_tokens[f'sent_1']==num_tokens[f'sent_2']:
70
+ st.subheader('Matched!')
71
+ else:
72
+ st.subheader('Not Matched...')