import streamlit as st from transformers import AutoModelForSeq2SeqLM, AutoTokenizer st.title("SpellCorrectorT5") st.markdown('SpellCorrectorT5 is a fine-tuned version of **pre-trained t5-small model** modelled on randomly selected 50000 sentences modified by [imputing random noises/errors](./random_noiser.py) and trained using transformers. It not only looks for _spelling errors but also looks for the semantics_ in the sentence and suggest other possible words for the incorrect word.') m_name = "vishnun/tinygram" ttokenizer = AutoTokenizer.from_pretrained(m_name) tmodel = AutoModelForSeq2SeqLM.from_pretrained(m_name) form = st.form("T5-form") examples = ["I will return it to yu once it is donr", "Iu is going to rain", "Wheir do you live?", "It wis great mieting with you all"] input_text = form.selectbox(label="Choose an example", options=examples) form.write("(or)") input_text = form.text_input(label='Enter your own sentence', value=input_text) submit = form.form_submit_button("Submit") if submit: input_ids = ttokenizer.encode(input_text, return_tensors='pt') # generate text until the output length (which includes the context length) reaches 50 outputs = tmodel.generate( input_ids, do_sample=True, max_length=50, top_p=0.999, top_k=45, num_return_sequences=2 ) st.subheader("Most probable: ") for y in outputs: out_text = ttokenizer.decode(y, skip_special_tokens=True) st.success(out_text.capitalize()) c_text = "" for x in out_text.lower().split(" "): if x in input_text.lower().split(" "): c_text = c_text + x + " " else: c_text = c_text + '' + x + '' + " " ct = c_text.capitalize() st.markdown(str(ct), unsafe_allow_html=True) st.markdown("***", unsafe_allow_html=True)