import streamlit as st from extractor import extract, FewDocumentsError from summarizer import summarize from translation import translate import time import cProfile from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch @st.cache(allow_output_mutation=True) def init(): # Dowload required NLTK resources from nltk import download download('punkt') download('stopwords') device = "cuda" if torch.cuda.is_available() else "cpu" # Model for semantic searches search_model = SentenceTransformer('msmarco-distilbert-base-v4', device=device) # Model for abstraction summ_model = AutoModelForSeq2SeqLM.from_pretrained('t5-base') tokenizer = AutoTokenizer.from_pretrained('t5-base') return search_model, summ_model, tokenizer def main(): search_model, summ_model, tokenizer = init() st.title("AutoSumm") st.subheader("Lucas Antunes & Matheus Vieira") portuguese = st.checkbox('Traduzir para o português.') if portuguese: st.subheader("Digite o tópico sobre o qual você deseja gerar um resumo") query_pt = st.text_input('Digite o tópico') #text is stored in this variable button = st.button('Gerar resumo') else: st.subheader("Type the desired topic to generate the summary") query = st.text_input('Type your topic') #text is stored in this variable button = st.button('Generate summary') if 'few_documents' not in st.session_state: st.session_state['few_documents'] = False few_documents = False else: few_documents = st.session_state['few_documents'] if button: start_time = time.time() query = translate(query_pt, 'pt', 'en') if portuguese else query try: with st.spinner('Extraindo textos relevantes...'): text = extract(query, search_model=search_model) except FewDocumentsError as e: few_documents = True st.session_state['few_documents'] = True st.session_state['documents'] = e.documents st.session_state['msg'] = e.msg else: st.info(f'(Extraction) Elapsed time: {time.time() - start_time:.2f}s') with st.spinner('Gerando resumo...'): summary = summarize(text, summ_model, tokenizer) st.info(f'(Total) Elapsed time: {time.time() - start_time:.2f}s') if portuguese: st.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}') else: st.markdown(f'Your summary for "{query}":\n\n> {summary}') if few_documents: st.warning(st.session_state['msg']) if st.button('Prosseguir'): start_time = time.time() with st.spinner('Extraindo textos relevantes...'): text = extract(query, search_model=search_model, extracted_documents=st.session_state['documents']) st.info(f'(Extraction) Elapsed time: {time.time() - start_time:.2f}s') with st.spinner('Gerando resumo...'): summary = summarize(text, summ_model, tokenizer) st.info(f'(Total) Elapsed time: {time.time() - start_time:.2f}s') if portuguese: st.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}') else: st.markdown(f'Your summary for "{query}":\n\n> {summary}') st.session_state['few_documents'] = False few_documents = False if __name__ == '__main__': cProfile.run('main()', 'stats.txt')