Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
import nltk | |
nltk.download('punkt') | |
model_dir = "lcw99/t5-base-korean-text-summary" | |
tokenizer = AutoTokenizer.from_pretrained(model_dir) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_dir) | |
max_input_length = 512 | |
sample_text = """ | |
μ£ΌμΈκ³΅ κ°μΈκ΅¬(νμ μ°)λ βμ리λ¨μμ νμ΄κ° λ§μ΄ λλλ° λ€ κ°λ€λ²λ¦°λ€βλ μΉκ΅¬ | |
λ°μμ(νλ΄μ)μ μκΈ°λ₯Ό λ£κ³ μ리λ¨μ° νμ΄λ₯Ό νκ΅μ μμΆνκΈ° μν΄ μ리λ¨μΌλ‘ κ°λ€. | |
κ΅λ¦½μμ°κ³Όνμ μΈ‘μ βμ€μ λ‘ λ¨λμμμ νμ΄κ° λ§μ΄ μ΄κ³ μλ₯΄ν¨ν°λλ₯Ό λΉλ‘―ν λ¨λ―Έ κ΅κ°μμ νμ΄κ° λ§μ΄ μ‘νλ€βλ©° | |
βμλ¦¬λ¨ μ°μμλ νμ΄κ° λ§μ΄ μμν κ²βμ΄λΌκ³ μ€λͺ νλ€. | |
κ·Έλ¬λ κ΄μΈμ²μ λ°λ₯΄λ©΄ νκ΅μ μ리λ¨μ° νμ΄κ° μμ λ μ μ μλ€. | |
μΌκ°μμ βλμ λ²κΈ° μν΄ μ리λ¨μ° νμ΄λ₯Ό ꡬνλ¬ κ° μ€μ μ κ°μ°μ±μ΄ λ¨μ΄μ§λ€βλ μ§μ λ νλ€. | |
λλΌλ§ λ°°κ²½μ΄ λ 2008~2010λ μλ μ΄λ―Έ κ΅λ΄μ μλ₯΄ν¨ν°λ, μΉ λ , λ―Έκ΅ λ± μλ©λ¦¬μΉ΄μ° νμ΄κ° μμ λκ³ μμκΈ° λλ¬Έμ΄λ€. | |
μ€μ μ‘°λ΄ν μ²΄ν¬ μμ μ νμ‘°νλ βνλ ₯μ Kμ¨βλ νμ΄ μ¬μ μ΄ μλλΌ μ리λ¨μ μ λ°μ© νΉμμ©μ λ΄μ νλ μ¬μ μ νλ¬ μ리λ¨μ κ°μλ€. | |
""" | |
st.title("Summarize Text") | |
sentence = st.text_area('Please paste your Korean article :', value=sample_text, height=500) | |
button = st.button("Summarize") | |
min_len = st.sidebar.slider('Select output min', 10, 450, step=10, value=50) | |
max_len = st.sidebar.slider('Select output max', 50, 500, step=10, value=150) | |
do_sample = st.sidebar.checkbox("Do sample", value=True) | |
with st.spinner("Generating Summary.."): | |
if button and sentence: | |
inputs = ["summarize: " + sentence] | |
inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, return_tensors="pt") | |
output = model.generate(**inputs, num_beams=8, do_sample=do_sample, min_length=min_len, max_length=max_len) | |
decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0] | |
predicted_title = nltk.sent_tokenize(decoded_output.strip())[0] | |
st.write(predicted_title) |