""" Script for streamlit demo @author: AbinayaM02 """ # Install necessary libraries from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline import streamlit as st from pprint import pprint import json # Read the config with open("config.json") as f: config = json.loads(f.read()) # Set page layout st.set_page_config( page_title="Tamil Language Models", layout="wide", initial_sidebar_state="expanded" ) # Load the model @st.cache(allow_output_mutation=True) def load_model(model_name): with st.spinner('Waiting for the model to load.....'): model = AutoModelWithLMHead.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) st.success('Model loaded!!') return model, tokenizer # Side bar img = st.sidebar.image("images/tamil_logo.jpg", width=380) # Choose the model based on selection page = st.sidebar.selectbox("Model", config["models"]) data = st.sidebar.selectbox("Data", config[page]) # Main page st.header("Tamil Language Demos") st.markdown( "This demo uses [GPT2 trained on Oscar dataset](https://huggingface.co/flax-community/gpt-2-tamil) " "and [GPT2 trained on Oscar & Indic Corpus dataset] (https://huggingface.co/abinayam/gpt-2-tamil) " "to show language generation" ) if page == 'Text Generation' and data == 'Oscar': st.title('Tamil text generation with GPT2') st.markdown('A simple demo using gpt-2-tamil model trained on Oscar data') model, tokenizer = load_model(config[data]) # Set default options seed = st.text_input('Starting text', 'அகர முதல எழுதெல்லம்') #seq_num = st.number_input('Number of sentences to generate ', 1, 20, 5) max_len = st.number_input('Length of the sentence', 5, 300, 100) gen_bt = st.button('Generate') if gen_bt: try: with st.spinner('Generating...'): generator = pipeline('text-generation', model=model, tokenizer=tokenizer) seqs = generator(seed, max_length=max_len) # num_return_sequences=seq_num) st.write(seqs) except Exception as e: st.exception(f'Exception: {e}') elif page == 'Text Generation' and data == "Oscar + Indic Corpus": st.title('Tamil text generation with GPT2') st.markdown('A simple demo using gpt-2-tamil model trained on Oscar data') model, tokenizer = load_model(config[data]) # Set default options seed = st.text_input('Starting text', 'அகர முதல எழுதெல்லம்') #seq_num = st.number_input('Number of sentences to generate ', 1, 20, 5) max_len = st.number_input('Length of the sentence', 5, 300, 100) gen_bt = st.button('Generate') if gen_bt: try: with st.spinner('Generating...'): generator = pipeline('text-generation', model=model, tokenizer=tokenizer) seqs = generator(seed, max_length=max_len) #num_return_sequences=seq_num) st.write(seqs) except Exception as e: st.exception(f'Exception: {e}') else: st.title('Tamil News classification with Finetuned GPT2') st.markdown('In progress')