import streamlit as st from extractor import extract, FewDocumentsError from summarizer import summarize import time import cProfile from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch @st.cache(allow_output_mutation=True) def init(): # Dowload required NLTK resources from nltk import download download('punkt') download('stopwords') device = "cuda" if torch.cuda.is_available() else "cpu" # Model for semantic searches search_model = SentenceTransformer('msmarco-distilbert-base-v4', device=device) # Model for abstraction summ_model = AutoModelForSeq2SeqLM.from_pretrained('t5-base') tokenizer = AutoTokenizer.from_pretrained('t5-base') return search_model, summ_model, tokenizer # TODO: translation def main(): search_model, summ_model, tokenizer = init() st.title("Trabalho de Formatura - Construindo textos para a internet") st.subheader("Lucas Antunes e Matheus Vieira") st.subheader("Digite o tópico sobre o qual você deseja gerar um resumo") query = st.text_input('Digite o tópico em inglês') #text is stored in this variable if 'few_documents' not in st.session_state: st.session_state['few_documents'] = False few_documents = False else: few_documents = st.session_state['few_documents'] button1 = st.button('Gerar resumo') if button1: start_time = time.time() try: with st.spinner('Extraindo textos relevantes...'): text = extract(query, search_model=search_model) except FewDocumentsError as e: few_documents = True st.session_state['few_documents'] = True st.session_state['documents'] = e.documents st.session_state['msg'] = e.msg else: st.info(f'(Extraction) Elapsed time: {time.time() - start_time:.2f}s') with st.spinner('Gerando resumo...'): summary = summarize(text, summ_model, tokenizer) st.info(f'(Total) Elapsed time: {time.time() - start_time:.2f}s') st.markdown(f'Seu resumo para "{query}":\n\n> {summary}') if few_documents: st.warning(st.session_state['msg']) if st.button('Prosseguir'): start_time = time.time() with st.spinner('Extraindo textos relevantes...'): text = extract(query, search_model=search_model, extracted_documents=st.session_state['documents']) st.info(f'(Extraction) Elapsed time: {time.time() - start_time:.2f}s') with st.spinner('Gerando resumo...'): summary = summarize(text, summ_model, tokenizer) st.info(f'(Total) Elapsed time: {time.time() - start_time:.2f}s') st.markdown(f'Seu resumo para "{query}":\n\n> {summary}') st.session_state['few_documents'] = False few_documents = False if __name__ == '__main__': cProfile.run('main()', 'stats.txt')