Spaces:
Sleeping
Sleeping
File size: 2,588 Bytes
5ff29be 35996ec 5ff29be ae7ad9f ab98424 6893866 35996ec 96a0e76 ae7ad9f 7cf6c21 19898a1 1d128e9 19898a1 ae7ad9f 19898a1 96a0e76 1459d42 ab98424 1459d42 19898a1 ae7ad9f 35996ec ae7ad9f 35996ec ae7ad9f 35996ec ae7ad9f 1c58aa1 ae7ad9f 224f5e0 ae7ad9f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import streamlit as st
from utils import get_res
st.sidebar.title('Tokenizers demo')
#x = st.slider('Select a value')
#st.write(x, 'squared is', x * x)
model_option = ['deepseek-ai/deepseek-coder-1.3b-instruct',
'MediaTek-Research/Breeze-7B-Instruct-64k-v0_1',
'microsoft/phi-2', 'enter by myself']
input_option = ['123.5', 'hello world!!!', '大雨+寒流來襲!全台極凍72小時「探5度以下」',
'大雨+寒流来袭!全台极冻72小时「探5度以下」',
'enter by myself']
st.sidebar.subheader('Choose the tokenizer', divider='grey')
st.sidebar.write('You can choose `enter by myself` to paste the model you want.')
model_name_A = st.sidebar.selectbox(
'Model Name A',
model_option)
if model_name_A == 'enter by myself':
model_name_A = st.sidebar.text_input('Please enter Model Name A', 'deepseek-ai/deepseek-coder-1.3b-instruct')
model_name_B = st.sidebar.selectbox(
'Model Name B',
model_option)
if model_name_B == 'enter by myself':
model_name_B = st.sidebar.text_input('Please enter Model Name B', 'deepseek-ai/deepseek-coder-1.3b-instruct')
#with st.sidebar.expander("Models that you might want"):
# for m in model_option:
# st.write(m)
#'Your choice:', model_name
st.sidebar.subheader('Choose the input sentence', divider='grey')
st.sidebar.write('You can choose `enter by myself` to enter the text you want.')
input_data = st.sidebar.selectbox(
'Input Sentence',
input_option)
if input_data == 'enter by myself':
input_data = st.sidebar.text_input('Write the Input Sentence', 'Hello sunshine!!!')
#with st.sidebar.expander("Input that you might want to test"):
# for m in input_option:
# st.write(m)
col1, col2 = st.columns(2)
with col1:
st.subheader(model_name_A, divider='grey')
res, token_num = get_res(model_name=model_name_A, input_sentence=input_data, single_print=False)
st.subheader('Tokenized result')
st.markdown(res, unsafe_allow_html=True)
st.subheader('Number of tokens')
st.markdown(f'<span style="font-size:1.875em">{str(token_num)}</span>',
unsafe_allow_html=True)
with col2:
st.subheader(model_name_B, divider='grey')
res, token_num = get_res(model_name=model_name_B, input_sentence=input_data, single_print=False)
st.subheader('Tokenized result')
st.markdown(res, unsafe_allow_html=True)
st.subheader('Number of tokens')
st.markdown(f'<span style="font-size:1.875em">{str(token_num)}</span>',
unsafe_allow_html=True)
|