Spaces:
Runtime error
Runtime error
from transformers import PreTrainedTokenizerFast | |
from tokenizers import SentencePieceBPETokenizer | |
from transformers import BartForConditionalGeneration | |
import streamlit as st | |
import torch | |
import random | |
def tokenizer(): | |
tokenizer = PreTrainedTokenizerFast.from_pretrained('Soyoung97/gec_kr') | |
return tokenizer | |
def get_model(): | |
model = BartForConditionalGeneration.from_pretrained('Soyoung97/gec_kr') | |
model.eval() | |
return model | |
default_text = 'νκ΅μ΄λ μ νν λ무 μ΄λ €μ΄ μΈμ΄μ΄μμ΄μ.' | |
model = get_model() | |
tokenizer = tokenizer() | |
st.title("Grammatical Error Correction for Korean: Demo") | |
text = st.text_input("Input corrputed sentence :", value=default_text) | |
default_text_list = ['νκ΅μ΄λ μ νν λ무 μ΄λ €μ΄ μΈμ΄μ΄μμ΄μ.', 'μ λ νκ΅λ§ λ°°μ μνμ΄μ.', 'λ©λ¨Έμ΄λ κ·μ½λ€', 'λνμμμ΄λ €!', 'μμ§μ¨κ° μμ©λκΉ?', 'μ§λλ μΈνλ·μΌλ‘ μ°Ύμλλ€.', 'κ·Έ μ κΏμ΄ κ΅μκΈ° λλ κ²μ λλ€'] | |
if st.button("try another example: "): | |
text_button = random.choice(default_text_list) | |
try_this = f"Try this text! : {text_button}" | |
st.write(try_this) | |
st.markdown("## Original sentence:") | |
st.write(text) | |
if text: | |
st.markdown("## Corrected output") | |
with st.spinner('processing..'): | |
raw_input_ids = tokenizer.encode(text) | |
input_ids = [tokenizer.bos_token_id] + \ | |
raw_input_ids + [tokenizer.eos_token_id] | |
corrected_ids = model.generate(torch.tensor([input_ids]), | |
max_length=256, | |
eos_token_id=1, | |
num_beams=4, | |
early_stopping=True, | |
repetition_penalty=2.0) | |
output = tokenizer.decode(corrected_ids.squeeze().tolist(), skip_special_tokens=True) | |
if output == '': | |
output = 'Nothing generated...TT Please try again with different text!' | |
st.write(output) | |