keshan's picture
updating sidebar
77b63e6
raw history blame
No virus
2.11 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
# from huggingface_hub import snapshot_download
page = st.sidebar.selectbox("Model ", ["Finetuned on News data", "Pretrained GPT2"])
def load_model(model_name):
with st.spinner('Waiting for the model to load.....'):
# snapshot_download('flax-community/Sinhala-gpt2')
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
st.success('Model loaded!!')
return model, tokenizer
seed = st.sidebar.text_input('Starting text', 'ආයුබෝවන්')
seq_num = st.sidebar.number_input('Number of sentences to generate ', 1, 20, 5)
max_len = st.sidebar.number_input('Length of the sentence ', 5, 300, 100)
gen_bt = st.sidebar.button('Generate')
if page == 'Pretrained GPT2':
st.title('Sinhala Text generation with GPT2')
st.markdown('A simple demo using Sinhala-gpt2 model trained during hf-flax week')
model, tokenizer = load_model('flax-community/Sinhala-gpt2')
if gen_bt:
try:
with st.spinner('Generating...'):
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
st.write(seqs)
except Exception as e:
st.exception(f'Exception: {e}')
else:
st.title('Sinhala Text generation with Finetuned GPT2')
st.markdown('This model has been finetuned Sinhala-gpt2 model with 6000 news articles(~12MB)')
model, tokenizer = load_model('keshan/sinhala-gpt2-newswire')
if gen_bt:
try:
with st.spinner('Generating...'):
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
st.write(seqs)
except Exception as e:
st.exception(f'Exception: {e}')
st.markdown('____________')
st.markdown('by Keshan with Flax Community')