File size: 2,376 Bytes
f8987ba
 
d6f4621
f8987ba
d6f4621
f8987ba
d6f4621
 
 
 
 
 
 
f8987ba
d6f4621
 
 
8f192a0
d6f4621
 
 
 
 
 
 
 
f8987ba
d6f4621
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8987ba
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
# from huggingface_hub import snapshot_download

page = st.sidebar.selectbox("Model ", ["Pretrained GPT2", "Finetuned on News data"])

def load_model(model_name):
    with st.spinner('Waiting for the model to load.....'):
        # snapshot_download('flax-community/Sinhala-gpt2')
        model = AutoModelForCausalLM.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
    st.success('Model loaded!!')
    return model, tokenizer

seed = st.sidebar.text_input('Starting text', 'ආයුබෝවන්')
seq_num = st.sidebar.number_input('Number of sentences to generate ', 1, 20, 5)
max_len = st.sidebar.number_input('Length of the sentence ', 5, 300, 100)

if page == "Finetuned on News data":
    
    st.title('Sinhala Text generation with Finetuned GPT2')
    st.markdown('This model has been finetuned Sinhala-gpt2 model with 6000 news articles(~12MB)')
    
    # seed = st.text_input('Starting text', 'ආයුබෝවන්')
    # seq_num = st.number_input('Number of sentences to generate ', 1, 20, 5)
    # max_len = st.number_input('Length of the sentence ', 5, 300, 100)

    gen_news = st.button('Generate')
    model, tokenizer = load_model('keshan/sinhala-gpt2-newswire')


    if gen_news:
        try:
            with st.spinner('Generating...'):
                generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
                seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
            st.write(seqs)
        except Exception as e:
            st.exception(f'Exception: {e}')
else:
    st.title('Sinhala Text generation with GPT2')
    st.markdown('A simple demo using Sinhala-gpt2 model trained during hf-flax week')

    gen_gpt2 = st.button('Generate')
    model, tokenizer = load_model('flax-community/Sinhala-gpt2')


    if gen_gpt2:
        try:
            with st.spinner('Generating...'):
                generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
                seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
            st.write(seqs)
        except Exception as e:
            st.exception(f'Exception: {e}')
            

st.markdown('____________')
st.markdown('by Keshan with Flax Community')