Annorita commited on
Commit
ae7ad9f
1 Parent(s): 224f5e0

add model option

Browse files
Files changed (2) hide show
  1. app.py +38 -13
  2. utils.py +1 -1
app.py CHANGED
@@ -1,31 +1,56 @@
1
  import streamlit as st
2
  from utils import get_res
3
 
 
4
  st.sidebar.title('Tokenizers demo')
5
 
6
  #x = st.slider('Select a value')
7
  #st.write(x, 'squared is', x * x)
8
 
9
- st.sidebar.subheader('Choose the tokenizer', divider='grey')
10
- option = st.sidebar.selectbox(
11
- 'model_name',
12
- ['deepseek-ai/deepseek-coder-1.3b-instruct',
13
- 'bigcode/starcoder'])
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- model_name = st.sidebar.text_input('Model Name', 'deepseek-ai/deepseek-coder-1.3b-instruct')
16
 
17
  #'Your choice:', model_name
18
 
19
  st.sidebar.subheader('Write the input sentence', divider='grey')
20
- input_data = st.sidebar.text_input('Input Sentence', 'Hello world!!!')
 
 
 
 
 
 
 
21
 
 
 
22
 
23
- res, token_num = get_res(model_name=model_name, input_sentence=input_data, single_print=False)
 
 
24
 
25
- #st.markdown('<style></style>')
 
 
26
 
27
- st.subheader('Tokenized result', divider='grey')
28
- st.markdown(res, unsafe_allow_html=True)
29
 
30
- st.subheader('Number of tokens', divider='grey')
31
- st.write(token_num)
 
 
1
  import streamlit as st
2
  from utils import get_res
3
 
4
+
5
  st.sidebar.title('Tokenizers demo')
6
 
7
  #x = st.slider('Select a value')
8
  #st.write(x, 'squared is', x * x)
9
 
10
+ #st.sidebar.subheader('Choose the tokenizer', divider='grey')
11
+ #option = st.sidebar.selectbox(
12
+ # 'model_name',
13
+ # ['deepseek-ai/deepseek-coder-1.3b-instruct',
14
+ # 'bigcode/starcoder'])
15
+
16
+ model_name_A = st.sidebar.text_input('Model Name A', 'deepseek-ai/deepseek-coder-1.3b-instruct')
17
+ model_name_B = st.sidebar.text_input('Model Name B', 'deepseek-ai/deepseek-coder-1.3b-instruct')
18
+
19
+ model_option = ['deepseek-ai/deepseek-coder-1.3b-instruct',
20
+ 'MediaTek-Research/Breeze-7B-Instruct-64k-v0_1',
21
+ 'microsoft/phi-2']
22
+
23
+ with st.sidebar.expander("Models that you might want"):
24
+ for m in model_option:
25
+ st.write(m)
26
 
 
27
 
28
  #'Your choice:', model_name
29
 
30
  st.sidebar.subheader('Write the input sentence', divider='grey')
31
+ input_data = st.sidebar.text_input('Input Sentence', 'Hello sunshine!!!')
32
+
33
+
34
+ col1, col2 = st.columns(2)
35
+
36
+ with col1:
37
+ st.subheader(model_name_A, divider='grey')
38
+ res, token_num = get_res(model_name=model_name_A, input_sentence=input_data, single_print=False)
39
 
40
+ st.subheader('Tokenized result')
41
+ st.markdown(res, unsafe_allow_html=True)
42
 
43
+ st.subheader('Number of tokens')
44
+ st.markdown(f'<span style="font-size:1.875em">{str(token_num)}</span>',
45
+ unsafe_allow_html=True)
46
 
47
+ with col2:
48
+ st.subheader(model_name_B, divider='grey')
49
+ res, token_num = get_res(model_name=model_name_B, input_sentence=input_data, single_print=False)
50
 
51
+ st.subheader('Tokenized result')
52
+ st.markdown(res, unsafe_allow_html=True)
53
 
54
+ st.subheader('Number of tokens')
55
+ st.markdown(f'<span style="font-size:1.875em">{str(token_num)}</span>',
56
+ unsafe_allow_html=True)
utils.py CHANGED
@@ -16,7 +16,7 @@ def get_res(model_name, input_sentence, single_print=True):
16
  out = tokenizer.encode(input_sentence, add_special_tokens=False)
17
  token_num = len(out)
18
 
19
- w = [ f'<span style="background-color:{next(color_iterator)}">{tokenizer.decode(x)}</span>' for x in out ]
20
  res = ''.join(w)
21
  if single_print:
22
  print(res + str(token_num))
 
16
  out = tokenizer.encode(input_sentence, add_special_tokens=False)
17
  token_num = len(out)
18
 
19
+ w = [ f'<span style="font-size:1.25em;background-color:{next(color_iterator)}">{tokenizer.decode(x)}</span>' for x in out ]
20
  res = ''.join(w)
21
  if single_print:
22
  print(res + str(token_num))