Spaces:
Runtime error
Runtime error
import os | |
from typing import List | |
import streamlit as st | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
def load_model(model_name): | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
return model, tokenizer | |
def paraphrase(model, encoding, top_k=120, top_p=0.95, max_len=120) -> List[str]: | |
outputs = model.generate( | |
input_ids=encoding["input_ids"], | |
attention_mask=encoding["attention_mask"], | |
do_sample=True, | |
top_k=top_k, | |
top_p=top_p, | |
max_length=max_len, | |
early_stopping=True, | |
num_return_sequences=5, | |
) | |
return [ | |
tokenizer.decode( | |
output, skip_special_tokens=True, clean_up_tokenization_spaces=True | |
) | |
for output in outputs | |
] | |
if __name__ == "__main__": | |
st.header("Indonesian Paraphrase Generation") | |
user_input = st.text_area("Original Sentence", "", height=30) | |
# Slider for max_len | |
st.sidebar.header("Decoding Settings") | |
max_len = st.sidebar.slider("Max-Length", 0, 512, 256) | |
top_k = st.sidebar.slider("Top-K", 0, 512, 200) | |
top_p = st.sidebar.slider("Top-P", 0.0, 1.0, 0.95) | |
if st.button("Paraphrase") or user_input: | |
with st.spinner("T5 is processing your text..."): | |
model, tokenizer = load_model("Wikidepia/IndoT5-base-paraphrase") | |
text = "paraphrase: " + user_input + " </s>" | |
encode_id = tokenizer(text, return_tensors="pt") | |
outputs = paraphrase( | |
model, encode_id, top_k=top_k, top_p=top_p, max_len=max_len | |
) | |
st.markdown("### Hasil Parafrase") | |
for i, output in enumerate(outputs): | |
st.markdown(f"- {output}") | |