taka-yamakoshi commited on
Commit
ed9112c
1 Parent(s): 9240bf4

add instructions

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -98,15 +98,19 @@ if __name__=='__main__':
98
  st.markdown(generate_markdown('quick and easy way to explore how tokenizers work',size=24), unsafe_allow_html=True)
99
 
100
  # Select and load the tokenizer
101
- tokenizer_name = st.sidebar.selectbox('Choose the tokenizer from below',
102
  ('bert-base-uncased','bert-large-cased',
103
  'gpt2','gpt2-large',
104
  'roberta-base','roberta-large',
105
  'albert-base-v2','albert-xxlarge-v2'),index=7)
106
  tokenizer = load_model(tokenizer_name)
107
 
 
 
 
 
108
  comparison_mode = st.sidebar.checkbox('Compare two texts')
109
- detokenize = st.sidebar.checkbox('de-tokenize (make sure to type in integers separated by single spaces)')
110
  if comparison_mode:
111
  sent_cols = st.columns(2)
112
  num_tokens = {}
@@ -122,7 +126,7 @@ if __name__=='__main__':
122
  sents[f'sent_{sent_id+1}'] = sentence
123
 
124
  if len(sents['sent_1'])>0 and len(sents['sent_2'])>0:
125
- st.markdown(generate_markdown('Result: ',size=16), unsafe_allow_html=True)
126
  if num_tokens[f'sent_1']==num_tokens[f'sent_2']:
127
  st.markdown(generate_markdown('Matched! ',color='MediumAquamarine'), unsafe_allow_html=True)
128
  else:
 
98
  st.markdown(generate_markdown('quick and easy way to explore how tokenizers work',size=24), unsafe_allow_html=True)
99
 
100
  # Select and load the tokenizer
101
+ tokenizer_name = st.sidebar.selectbox('1. Choose the tokenizer from below',
102
  ('bert-base-uncased','bert-large-cased',
103
  'gpt2','gpt2-large',
104
  'roberta-base','roberta-large',
105
  'albert-base-v2','albert-xxlarge-v2'),index=7)
106
  tokenizer = load_model(tokenizer_name)
107
 
108
+ st.sidebar.write('2. Optional settings')
109
+ st.sidebar.write(f'"Compare two texts" compares # tokens for two pieces of text '\
110
+ +f'and "de-tokenize" converts a list of tokenized indices back to strings.')
111
+ st.sidebar.write(f'For "de-tokenize", make sure to type in integers, separated by single spaces')
112
  comparison_mode = st.sidebar.checkbox('Compare two texts')
113
+ detokenize = st.sidebar.checkbox('de-tokenize')
114
  if comparison_mode:
115
  sent_cols = st.columns(2)
116
  num_tokens = {}
 
126
  sents[f'sent_{sent_id+1}'] = sentence
127
 
128
  if len(sents['sent_1'])>0 and len(sents['sent_2'])>0:
129
+ st.markdown(generate_markdown('# Tokens: ',size=16), unsafe_allow_html=True)
130
  if num_tokens[f'sent_1']==num_tokens[f'sent_2']:
131
  st.markdown(generate_markdown('Matched! ',color='MediumAquamarine'), unsafe_allow_html=True)
132
  else: