taka-yamakoshi commited on
Commit
a999c8e
1 Parent(s): 8a204f8

fix a bug & minor update

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -36,8 +36,10 @@ def TokenizeText(sentence):
36
  #for word_col,word in zip(word_cols,decoded_sent):
37
  #with word_col:
38
  #st.write(word)
39
- st.write(' '.join(encoded_sent))
40
- st.write(' '.join(decoded_sent))
 
 
41
  st.markdown(generate_markdown(f'{num_tokens} tokens'), unsafe_allow_html=True)
42
 
43
  return num_tokens
@@ -73,17 +75,18 @@ if __name__=='__main__':
73
  st.markdown(hide_table_row_index, unsafe_allow_html=True)
74
 
75
  # Title
76
- st.markdown(generate_markdown('Tokenizer Demo',size=32), unsafe_allow_html=True)
 
77
 
78
  # Select and load the tokenizer
79
- tokenizer_name = st.selectbox('Choose the tokenizer from below',
80
- ('bert-base-uncased','bert-large-cased',
81
- 'gpt2','gpt2-large',
82
- 'roberta-base','roberta-large',
83
- 'albert-base-v2','albert-xxlarge-v2'),index=7)
84
  tokenizer = load_model(tokenizer_name)
85
 
86
- comparison_mode = st.checkbox('Compare two texts')
87
  if comparison_mode:
88
  sent_cols = st.columns(2)
89
  num_tokens = {}
@@ -92,7 +95,7 @@ if __name__=='__main__':
92
  with sent_col:
93
  sentence = st.text_input(f'Text {sent_id+1}')
94
  sents[f'sent_{sent_id+1}'] = sentence
95
- num_tokens[f'{sent_id+1}'] = TokenizeText(sentence)
96
 
97
  if len(sents['sent_1'])>0 and len(sents['sent_2'])>0:
98
  st.markdown(generate_markdown('Result: ',size=16), unsafe_allow_html=True)
 
36
  #for word_col,word in zip(word_cols,decoded_sent):
37
  #with word_col:
38
  #st.write(word)
39
+ #st.write(' '.join(encoded_sent))
40
+ #st.write(' '.join(decoded_sent))
41
+ st.markdown(generate_markdown(' '.join(encoded_sent),size=16), unsafe_allow_html=True)
42
+ st.markdown(generate_markdown(' '.join(decoded_sent),size=16), unsafe_allow_html=True)
43
  st.markdown(generate_markdown(f'{num_tokens} tokens'), unsafe_allow_html=True)
44
 
45
  return num_tokens
 
75
  st.markdown(hide_table_row_index, unsafe_allow_html=True)
76
 
77
  # Title
78
+ st.markdown(generate_markdown('Tokenizer Demo:',size=32), unsafe_allow_html=True)
79
+ st.markdown(generate_markdown('Quick and easy way to explore how tokenizers work',size=24), unsafe_allow_html=True)
80
 
81
  # Select and load the tokenizer
82
+ tokenizer_name = st.sidebar.selectbox('Choose the tokenizer from below',
83
+ ('bert-base-uncased','bert-large-cased',
84
+ 'gpt2','gpt2-large',
85
+ 'roberta-base','roberta-large',
86
+ 'albert-base-v2','albert-xxlarge-v2'),index=7)
87
  tokenizer = load_model(tokenizer_name)
88
 
89
+ comparison_mode = st.sidebar.checkbox('Compare two texts')
90
  if comparison_mode:
91
  sent_cols = st.columns(2)
92
  num_tokens = {}
 
95
  with sent_col:
96
  sentence = st.text_input(f'Text {sent_id+1}')
97
  sents[f'sent_{sent_id+1}'] = sentence
98
+ num_tokens[f'sent_{sent_id+1}'] = TokenizeText(sentence)
99
 
100
  if len(sents['sent_1'])>0 and len(sents['sent_2'])>0:
101
  st.markdown(generate_markdown('Result: ',size=16), unsafe_allow_html=True)