mhsvieira commited on
Commit
2b3e58c
1 Parent(s): 30ce4c0

UI improvements

Browse files
Files changed (3) hide show
  1. AutoSumm.png +0 -0
  2. app.py +36 -7
  3. extractor/_utils.py +22 -5
AutoSumm.png ADDED
app.py CHANGED
@@ -29,23 +29,43 @@ def main():
29
  search_model, summ_model, tokenizer = init()
30
  Timer.reset()
31
 
32
- st.title("AutoSumm")
 
33
  st.subheader("Lucas Antunes & Matheus Vieira")
34
 
35
  portuguese = st.checkbox('Traduzir para o português.')
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  if portuguese:
38
  environ['PORTUGUESE'] = 'true' # work around (gambiarra)
39
- st.subheader("Digite o tópico sobre o qual você deseja gerar um resumo")
40
- query_pt = st.text_input('Digite o tópico') #text is stored in this variable
41
  button = st.button('Gerar resumo')
42
  else:
43
  environ['PORTUGUESE'] = 'false' # work around (gambiarra)
44
- st.subheader("Type the desired topic to generate the summary")
45
- query = st.text_input('Type your topic') #text is stored in this variable
46
  button = st.button('Generate summary')
47
 
48
- result = st.empty()
49
 
50
  if 'few_documents' not in st.session_state:
51
  st.session_state['few_documents'] = False
@@ -68,22 +88,31 @@ def main():
68
 
69
  if portuguese:
70
  result.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}')
 
 
71
  else:
72
  result.markdown(f'Your summary for "{query}":\n\n> {summary}')
 
 
73
 
74
  Timer.show_total()
75
 
76
 
77
  if few_documents:
78
  st.warning(st.session_state['msg'])
79
- if st.button('Prosseguir'):
 
80
  text = extract(query, search_model=search_model, extracted_documents=st.session_state['documents'])
81
  summary = summarize(text, summ_model, tokenizer)
82
 
83
  if portuguese:
84
  result.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}')
 
 
85
  else:
86
  result.markdown(f'Your summary for "{query}":\n\n> {summary}')
 
 
87
 
88
  st.session_state['few_documents'] = False
89
  few_documents = False
 
29
  search_model, summ_model, tokenizer = init()
30
  Timer.reset()
31
 
32
+ _, col2, _ = st.columns([1,1,1])
33
+ col2.image('AutoSumm.png', width=250)
34
  st.subheader("Lucas Antunes & Matheus Vieira")
35
 
36
  portuguese = st.checkbox('Traduzir para o português.')
37
 
38
+ st.sidebar.markdown("""
39
+ # Processing steps
40
+ #### Translation
41
+ Step where the system translates the user's query from Portuguese to English and the summary from English to Portuguese.
42
+
43
+ #### Corpus generation
44
+ Step where the system generates the complete corpus: query-related web pages and documents (PDFs and text files) on query-related knowledge area. The Corpus for this model was built to gather documents related to the Blue Amazon, a maritime region in South America.
45
+
46
+ #### Exhaustive search
47
+ Step where the system filters the texts of the corpus that contain keywords from the query.
48
+
49
+ #### Semantic search over documents
50
+ Step in which the system selects documents related to the query through semantic search.
51
+
52
+ #### Semantic search over paragraphs
53
+ Step in which the system breaks documents into paragraphs and selects those related to the query through semantic search.
54
+
55
+ #### Abstraction
56
+ Step in which the system generates an abstractive summary about the query from the best three paragraphs of the previous step.
57
+ """)
58
+
59
  if portuguese:
60
  environ['PORTUGUESE'] = 'true' # work around (gambiarra)
61
+ query_pt = st.text_input('Digite o tópico sobre o qual você deseja gerar um resumo') #text is stored in this variable
 
62
  button = st.button('Gerar resumo')
63
  else:
64
  environ['PORTUGUESE'] = 'false' # work around (gambiarra)
65
+ query = st.text_input('Type the desired topic to generate the summary') #text is stored in this variable
 
66
  button = st.button('Generate summary')
67
 
68
+ result = st.container()
69
 
70
  if 'few_documents' not in st.session_state:
71
  st.session_state['few_documents'] = False
 
88
 
89
  if portuguese:
90
  result.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}')
91
+ with result.expander(f'Parágrafos usados na geração do resumo'):
92
+ st.markdown(translate(text, "en", "pt").replace('\n', '\n\n'))
93
  else:
94
  result.markdown(f'Your summary for "{query}":\n\n> {summary}')
95
+ with result.expander(f'Paragraphs used in summarization'):
96
+ st.markdown(text.replace('\n', '\n\n'))
97
 
98
  Timer.show_total()
99
 
100
 
101
  if few_documents:
102
  st.warning(st.session_state['msg'])
103
+ msg = 'Prosseguir' if portuguese else 'Proceed'
104
+ if st.button(msg):
105
  text = extract(query, search_model=search_model, extracted_documents=st.session_state['documents'])
106
  summary = summarize(text, summ_model, tokenizer)
107
 
108
  if portuguese:
109
  result.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}')
110
+ with result.expander(f'Parágrafos usados na geração do resumo'):
111
+ st.markdown(translate(text, "en", "pt").replace('\n', '\n\n'))
112
  else:
113
  result.markdown(f'Your summary for "{query}":\n\n> {summary}')
114
+ with result.expander(f'Paragraphs used in summarization'):
115
+ st.markdown(text.replace('\n', '\n\n'))
116
 
117
  st.session_state['few_documents'] = False
118
  few_documents = False
extractor/_utils.py CHANGED
@@ -3,6 +3,7 @@ import numpy as np
3
  import streamlit as st
4
  # import inflect
5
  import torch
 
6
 
7
  # p = inflect.engine()
8
 
@@ -23,6 +24,13 @@ def document_extraction(dataset, query, keywords, min_document_size, min_just_on
23
  lower_query = query.lower()
24
  lower_keywords = [keyword.lower() for keyword in keywords]
25
 
 
 
 
 
 
 
 
26
  documents = {}
27
 
28
  documents['QUERY'] = [
@@ -61,7 +69,10 @@ def document_extraction(dataset, query, keywords, min_document_size, min_just_on
61
  if all(empty.values()):
62
  # TODO: throw error
63
  st.info(empty.values())
64
- st.warning(f'No document found for the query "{query}", please try with another query')
 
 
 
65
  st.stop()
66
 
67
  if sizes['QUERY'] >= 10:
@@ -72,10 +83,16 @@ def document_extraction(dataset, query, keywords, min_document_size, min_just_on
72
  extracted_documents = documents['OR']
73
  else:
74
  number_of_documents = sizes['OR']
75
- raise FewDocumentsError(documents['OR'], number_of_documents,
76
- f'Only {number_of_documents} documents found for the query "{query}"\n\
77
- Please select continue to proceed with {number_of_documents} documents or try again with another query'
78
- )
 
 
 
 
 
 
79
 
80
  return extracted_documents, empty, sizes
81
 
 
3
  import streamlit as st
4
  # import inflect
5
  import torch
6
+ from os import environ
7
 
8
  # p = inflect.engine()
9
 
 
24
  lower_query = query.lower()
25
  lower_keywords = [keyword.lower() for keyword in keywords]
26
 
27
+ if environ['PORTUGUESE'] == 'true':
28
+ portuguese = True
29
+ elif environ['PORTUGUESE'] == 'false':
30
+ portuguese = False
31
+ else:
32
+ raise EnvironmentError
33
+
34
  documents = {}
35
 
36
  documents['QUERY'] = [
 
69
  if all(empty.values()):
70
  # TODO: throw error
71
  st.info(empty.values())
72
+ if portuguese:
73
+ st.warning(f'Nenhum documento encontrado para a query "{query}", por favor, tente com outra query')
74
+ else:
75
+ st.warning(f'No document found for the query "{query}", please try with another query')
76
  st.stop()
77
 
78
  if sizes['QUERY'] >= 10:
 
83
  extracted_documents = documents['OR']
84
  else:
85
  number_of_documents = sizes['OR']
86
+ if portuguese:
87
+ raise FewDocumentsError(documents['OR'], number_of_documents,
88
+ f'Somente {number_of_documents} documentos encontrados para a query "{query}"\n\
89
+ Por favor selecione "Prosseguir" para prosseguir com {number_of_documents} documentos ou tente novamente com outra query'
90
+ )
91
+ else:
92
+ raise FewDocumentsError(documents['OR'], number_of_documents,
93
+ f'Only {number_of_documents} documents found for the query "{query}"\n\
94
+ Please select "Proceed" to proceed with {number_of_documents} documents or try again with another query'
95
+ )
96
 
97
  return extracted_documents, empty, sizes
98