Spaces:
Runtime error
Runtime error
from venv import create | |
import streamlit as st | |
from transformers import T5TokenizerFast, T5ForConditionalGeneration | |
from tfidf import tfidf, filter_paragraph | |
def remove_doc(i): | |
if ('docs' in st.session_state): | |
if (len(st.session_state['docs']) > i): | |
st.session_state['docs'].pop(i) | |
def split_sentences(paragraph): | |
sentences = paragraph.split(' . ') | |
return sentences | |
st.markdown('## Use o PLSUM para criar leads do Wikipedia automaticamente') | |
st.markdown(''' | |
Crie resumos no estilo do wikipedia a partir de multiplos documentos. | |
Cole textos de referência no formulário a baixo e depois clique em "Gerar resumo". | |
''') | |
if ('tokenizer' not in st.session_state): | |
with st.sidebar: | |
st.info('Carregando o tokenizador') | |
st.session_state['tokenizer'] = T5TokenizerFast.from_pretrained("seidel/plsum-base-ptt5") | |
if ('model' not in st.session_state): | |
with st.sidebar: | |
st.info('Carregando o modelo') | |
st.session_state['model'] = T5ForConditionalGeneration.from_pretrained("seidel/plsum-base-ptt5", use_cache=True) | |
if ('docs' not in st.session_state): | |
st.session_state['docs'] = [] | |
with st.form("my_form", clear_on_submit=True): | |
new_doc = st.text_area('Cole um documento de referência aqui') | |
# Every form must have a submit button. | |
submitted = st.form_submit_button("Adicionar texto") | |
if submitted: | |
if (new_doc != None and new_doc != ''): | |
st.session_state['docs'].append(filter_paragraph(new_doc)) | |
st.info('Documento adicionado') | |
else: | |
st.error('Adicione algum texto') | |
for i, doc in enumerate(st.session_state['docs']): | |
with st.sidebar: | |
col1, col2 = st.columns([8, 1]) | |
with col1: | |
with st.expander('Documento {}'.format(i+1)): | |
st.caption(doc) | |
with col2: | |
st.button('X', key='remove_{}'.format(i), on_click=remove_doc, args=(i,)) | |
query = st.text_input('Título do resumo') | |
create_summary = st.button('Criar resumo') | |
if (create_summary): | |
if (query != None and query != ''): | |
if (len(st.session_state['docs']) > 0): | |
with st.sidebar: | |
st.info('Criando resumo') | |
sentences = [] | |
for doc in st.session_state['docs']: | |
sentences = sentences + split_sentences(doc) | |
filtered_sentences = tfidf(sentences, n_documents=7) | |
input_text = 'summarize: {} </s> {}'.format(query.lower(), '</s>'.join(sentences)) | |
x = st.session_state['tokenizer'](input_text, padding="max_length", max_length=512, return_tensors="pt", truncation=True) | |
y = st.session_state['model'].generate(**x) | |
summary = st.session_state['tokenizer'].batch_decode(y, skip_special_tokens=True)[0] | |
st.markdown('#### {}'.format(query)) | |
st.markdown('{}'.format(summary)) | |
else: | |
with st.sidebar: | |
st.error('Adicione documentos de referência') | |
else: | |
with st.sidebar: | |
st.error('Adicione título para o resumo') | |