theekshana commited on
Commit
c293aab
1 Parent(s): a4f8505
Files changed (3) hide show
  1. app.py +159 -235
  2. config.py +4 -3
  3. summarizer.py +54 -53
app.py CHANGED
@@ -1,9 +1,11 @@
1
  import datetime
 
 
2
  import logging
3
  import nltk
4
  import validators
5
  import streamlit as st
6
- from summarizer import Summarizer
7
  from config import MODELS
8
  from warnings import filterwarnings
9
 
@@ -15,91 +17,141 @@ from utils import (
15
  read_text_from_file,
16
  )
17
 
 
 
18
 
19
- from rouge import Rouge
20
-
21
- # def filer():
22
- # # return "logs/log "
23
- # today = datetime.datetime.today()
24
- # log_filename = f"logs/{today.year}-{today.month:02d}-{today.day:02d}.log"
25
- # return log_filename
26
-
27
- # file_handler = logging.FileHandler(filer())
28
- # # file_handler = logging.handlers.TimedRotatingFileHandler(filer(),when="D")
29
- # file_handler.setLevel(logging.INFO)
30
-
31
- # logging.basicConfig(
32
- # level=logging.DEBUG,
33
- # format="%(asctime)s %(levelname)s (%(name)s) : %(message)s",
34
- # datefmt="%Y-%m-%d %H:%M:%S",
35
- # handlers=[file_handler],
36
- # force=True,
37
- # )
38
 
39
  logger = logging.getLogger(__name__)
40
 
41
-
42
- if "api_key" not in st.session_state:
43
- st.session_state.api_key = " "
44
-
45
-
46
- @st.cache_resource
47
  def initialize_app():
48
  nltk.download("punkt")
 
 
 
 
 
 
 
 
49
 
50
- @st.cache_resource
51
- def init_summarizer(model_name,api_key=None):
 
52
 
53
- model_type = "local"
54
- if model_name == "OpenAI":
55
- model_type = "openai"
56
 
57
- model_path = MODELS[model_name]
58
- if model_type == "openai":
59
- #validation logic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- return Summarizer(model_path,model_type,api_key)
62
- else:
63
- logger.info(f"Model for summarization : {model_path}")
64
- return Summarizer(model_path, model_type)
65
 
66
  def load_app():
67
  st.title("Text Summarizer 📝")
68
 
69
- # st.markdown("Creator: [Atharva Ingle](https://github.com/Gladiator07)")
70
- # st.markdown(
71
- # "Source code: [GitHub Repository](https://github.com/Gladiator07/Text-Summarizer)"
72
- # )
73
- model_name = st.sidebar.selectbox(
74
- "Model Name", options=["Version 0", "Version 1","OpenAI"]
75
- )
76
- if model_name == "OpenAI":
77
- st.sidebar.text_input("Enter a valid OpenAI API Key",key = "api_key" ,type="password")
78
-
79
- summarizer_type = st.sidebar.selectbox(
80
- "Summarizer Type for Long Text", options=["Map Reduce", "Refine"]
81
- )
82
-
83
- st.markdown(
84
- "Enter a text or a url to get a concise summary of the article while conserving the overall meaning. This app supports text in the following formats:"
85
- )
86
- st.markdown(
87
- """- Raw text in text box
88
- - URL of article/news to be summarized
89
- - .txt, .pdf, .docx file formats"""
90
- )
91
- st.markdown(
92
- """This app supports abstractive summarization of documents:
93
-
94
- **Abstractive Summarization**: The abstractive approach involves rephrasing the complete document while capturing the complete meaning of the document. This type of summarization provides more human-like summary"""
95
- )
96
- st.markdown("---")
97
- # ---------------------------
98
-
99
- # ---------------------------
100
  inp_text = st.text_input("Enter text or a url here")
101
  st.markdown(
102
- "<h3 style='text-align: center; color: green;'>OR</h3>",
103
  unsafe_allow_html=True,
104
  )
105
  uploaded_file = st.file_uploader(
@@ -125,51 +177,44 @@ def load_app():
125
  st.write(cleaned_txt[0])
126
  else:
127
  st.write(cleaned_txt)
128
- summarize = st.button("Summarize")
129
-
130
- if is_url:
131
- text_to_summarize = " ".join([txt for txt in cleaned_txt])
132
- else:
133
- text_to_summarize = cleaned_txt
134
-
135
- return text_to_summarize, model_name, summarizer_type, summarize
136
-
137
-
138
-
139
-
140
- def get_summary(text_to_summarize,model_name, summarizer_type, summarize):
141
-
142
- while not summarize:
143
- continue
144
-
145
- else:
146
 
147
- logger.info(f"Model Name: {model_name}")
148
- logger.info(f"Summarization Type for Long Text: {summarizer_type}")
149
 
150
- api_key = st.session_state.api_key
 
 
 
 
151
 
 
152
 
153
- summarizer = init_summarizer(model_name,api_key)
 
 
154
 
155
 
156
- with st.spinner(
157
- text="Creating summary. This might take a few seconds ..."
158
- ):
 
159
 
160
- if summarizer_type == "Refine":
161
- summarized_text, time = summarizer.summarize(text_to_summarize,"refine")
162
- return summarized_text, time
163
- else :
164
- summarized_text, time = summarizer.summarize(text_to_summarize,"map_reduce")
165
- return summarized_text, time
166
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
-
169
 
170
  def display_output(summarized_text,time):
171
-
172
-
173
  logger.info(f"SUMMARY: {summarized_text}")
174
  logger.info(f"Summary took {time}s")
175
  st.subheader("Summarized text")
@@ -177,140 +222,19 @@ def display_output(summarized_text,time):
177
  st.info(f"Time: {time}s")
178
 
179
 
180
- # def summarizer_app():
181
- # # ---------------------------------
182
- # # Main Application
183
- # # ---------------------------------
184
- # st.title("Text Summarizer 📝")
185
-
186
- # # st.markdown("Creator: [Atharva Ingle](https://github.com/Gladiator07)")
187
- # # st.markdown(
188
- # # "Source code: [GitHub Repository](https://github.com/Gladiator07/Text-Summarizer)"
189
- # # )
190
- # model_name = st.sidebar.selectbox(
191
- # "Model Name", options=["Version 0", "Version 1","OpenAI"]
192
- # )
193
- # if model_name == "OpenAI":
194
- # st.sidebar.text_input("Enter a valid OpenAI API Key",key = "api_key" ,type="password")
195
-
196
- # summarizer_type = st.sidebar.selectbox(
197
- # "Summarizer Type for Long Text", options=["Map Reduce", "Refine"]
198
- # )
199
-
200
- # st.markdown(
201
- # "Enter a text or a url to get a concise summary of the article while conserving the overall meaning. This app supports text in the following formats:"
202
- # )
203
- # st.markdown(
204
- # """- Raw text in text box
205
- # - URL of article/news to be summarized
206
- # - .txt, .pdf, .docx file formats"""
207
- # )
208
- # st.markdown(
209
- # """This app supports two type of summarization:
210
-
211
- # 1. **Extractive Summarization**: The extractive approach involves picking up the most important phrases and lines from the documents. It then combines all the important lines to create the summary. So, in this case, every line and word of the summary actually belongs to the original document which is summarized.
212
- # 2. **Abstractive Summarization**: The abstractive approach involves rephrasing the complete document while capturing the complete meaning of the document. This type of summarization provides more human-like summary"""
213
- # )
214
- # st.markdown("---")
215
- # # ---------------------------
216
- # # SETUP & Constants
217
- # # nltk.download("punkt")
218
- # # abs_tokenizer_name = "facebook/bart-large-cnn"
219
- # # abs_model_name = "facebook/bart-large-cnn"
220
- # # abs_tokenizer = AutoTokenizer.from_pretrained(abs_tokenizer_name)
221
- # # abs_max_length = 90
222
- # # abs_min_length = 30
223
-
224
- # # model_name_v0 = "IronOne-AI-Labs/long-t5-tglobal-16k-annual-reports-v0"
225
- # # model_name_v1 = "IronOne-AI-Labs/long-t5-tglobal-16k-annual-reports-v1"
226
- # # ---------------------------
227
- # inp_text = st.text_input("Enter text or a url here")
228
- # st.markdown(
229
- # "<h3 style='text-align: center; color: green;'>OR</h3>",
230
- # unsafe_allow_html=True,
231
- # )
232
- # uploaded_file = st.file_uploader(
233
- # "Upload a .txt, .pdf, .docx file for summarization"
234
- # )
235
-
236
- # is_url = validators.url(inp_text)
237
- # if is_url:
238
- # # complete text, chunks to summarize (list of sentences for long docs)
239
- # logger.info("Text Input Type: URL")
240
- # text, cleaned_txt = fetch_article_text(url=inp_text)
241
- # elif uploaded_file:
242
- # logger.info("Text Input Type: FILE")
243
- # cleaned_txt = read_text_from_file(uploaded_file)
244
- # cleaned_txt = clean_text(cleaned_txt)
245
- # else:
246
- # logger.info("Text Input Type: INPUT TEXT")
247
- # cleaned_txt = clean_text(inp_text)
248
-
249
- # # view summarized text (expander)
250
- # with st.expander("View input text"):
251
- # if is_url:
252
- # st.write(cleaned_txt[0])
253
- # else:
254
- # st.write(cleaned_txt)
255
- # summarize = st.button("Summarize")
256
-
257
- # # called on toggle button [summarize]
258
- # if summarize:
259
- # if is_url:
260
- # text_to_summarize = " ".join([txt for txt in cleaned_txt])
261
- # else:
262
- # text_to_summarize = cleaned_txt
263
-
264
- # logger.info(f"Model Name: {model_name}")
265
- # logger.info(f"Summarization Type for Long Text: {summarizer_type}")
266
-
267
- # api_key = st.session_state.api_key
268
-
269
- # print(api_key)
270
-
271
- # summarizer = init_summarizer(model_name,api_key)
272
-
273
- # with st.spinner(
274
- # text="Creating summary. This might take a few seconds ..."
275
- # ):
276
- # #ext_model = Summarizer()
277
- # #summarized_text = ext_model(text_to_summarize, num_sentences=5)
278
-
279
- # if summarizer_type == "Refine":
280
- # summarized_text, time = summarizer.summarize(text_to_summarize,"refine")
281
- # else :
282
- # summarized_text, time = summarizer.summarize(text_to_summarize,"map_reduce")
283
-
284
-
285
- # # elif model_name == "Version 1":
286
- # # with st.spinner(
287
- # # text="Creating summary. This might take a few seconds ..."
288
- # # ):
289
- # # if summarizer_type == "Refine":
290
- # # summarized_text, time = summarizer_v1.summarize(text_to_summarize,"refine")
291
- # # else :
292
- # # summarized_text, time = summarizer_v1.summarize(text_to_summarize,"map_reduce")
293
-
294
- # # final summarized output
295
-
296
- # logger.info(f"SUMMARY: {summarized_text}")
297
- # logger.info(f"Summary took {time}s")
298
- # st.subheader("Summarized text")
299
- # st.info(f"{summarized_text}")
300
- # st.info(f"Time: {time}s")
301
-
302
- # # st.subheader("Rogue Scores")
303
- # # rouge_sc = Rouge()
304
- # # ground_truth = cleaned_txt[0] if is_url else cleaned_txt
305
- # # score = rouge_sc.get_scores(summarized_text, ground_truth, avg=True)
306
- # # st.code(score)
307
 
308
 
309
  if __name__ == "__main__":
310
- initialize_app()
311
- text_to_summarize, model_name, summarizer_type, summarize = load_app()
312
- summarized_text,time = get_summary(text_to_summarize, model_name, summarizer_type, summarize)
313
- display_output(summarized_text,time)
314
 
315
 
316
 
 
1
  import datetime
2
+ import os
3
+ import time
4
  import logging
5
  import nltk
6
  import validators
7
  import streamlit as st
8
+ from summarizer import summarizer_init, summarizer_summarize
9
  from config import MODELS
10
  from warnings import filterwarnings
11
 
 
17
  read_text_from_file,
18
  )
19
 
20
+ # summarizer = None
21
+ # from rouge import Rouge
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  logger = logging.getLogger(__name__)
25
 
 
 
 
 
 
 
26
  def initialize_app():
27
  nltk.download("punkt")
28
+ SESSION_DEFAULTS = {
29
+ "model_type": "local",
30
+ "model_name": "long-t5 v1",
31
+ "summarizer_type": "Map Reduce",
32
+ "is_parameters_changed":False,
33
+ # "user_question":'',
34
+ 'openai_api_key':'',
35
+ }
36
 
37
+ for k, v in SESSION_DEFAULTS.items():
38
+ if k not in st.session_state:
39
+ st.session_state[k] = v
40
 
41
+ # init_summarizer(st.session_state.model_name,api_key=None)
 
 
42
 
43
+ @st.cache_resource
44
+ def init_summarizer(model_name,api_key=None):
45
+ with st.spinner(
46
+ text="initialising the summarizer. This might take a few seconds ..."
47
+ ):
48
+ model_type = "local"
49
+ if model_name == "OpenAI":
50
+ model_type = "openai"
51
+
52
+ model_path = MODELS[model_name]
53
+ if model_type == "openai":
54
+ #validation logic
55
+ api_key = st.session_state.openai_api_key
56
+ tokenizer,base_summarizer = summarizer_init(model_path,model_type,api_key)
57
+ else:
58
+ logger.info(f"Model for summarization : {model_path}")
59
+ tokenizer,base_summarizer = summarizer_init(model_path, model_type)
60
+
61
+ alert = st.success("summarizer initialised")
62
+ time.sleep(1) # Wait for 1 seconds
63
+ alert.empty() # Clear the alert
64
+ return model_type, tokenizer, base_summarizer
65
+
66
+ def update_parameters_change():
67
+ st.session_state.is_parameters_changed = True
68
+
69
+
70
+ def parameters_change_button(model_name, summarizer_type):
71
+ st.session_state.model_name = model_name
72
+ st.session_state.summarizer_type = summarizer_type
73
+ st.session_state.is_parameters_changed = False
74
+ # init_summarizer(model_name,api_key=None)
75
+ alert = st.success("chat parameters updated")
76
+ time.sleep(2) # Wait for 1 seconds
77
+ alert.empty() # Clear the alert
78
+
79
+ import re
80
+ def is_valid_open_ai_api_key(secretKey):
81
+ if re.search("^sk-[a-zA-Z0-9]{32,}$", secretKey ):
82
+ return True
83
+ else: return False
84
+
85
+ def side_bar():
86
+ with st.sidebar:
87
+ st.subheader("Model parameters")
88
+
89
+ with st.form('param_form'):
90
+ # st.info('Info: use openai chat model for best results')
91
+ model_name = st.selectbox(
92
+ "Chat model",
93
+ MODELS,
94
+ # options=["long-t5 v0", "long-t5 v1", "pegasus-x-large v1", "OpenAI"],
95
+ key="Model Name",
96
+ help="Select the LLM model for summarization",
97
+ # on_change=update_parameters_change,
98
+ )
99
+
100
+ summarizer_type = st.selectbox(
101
+ "Summarizer Type for Long Text", options=["Map Reduce", "Refine"]
102
+ )
103
+
104
+ submitted = st.form_submit_button(
105
+ "Save Parameters",
106
+ # on_click=update_parameters_change
107
+ )
108
+
109
+ if submitted:
110
+ parameters_change_button(model_name, summarizer_type)
111
+
112
+
113
+ st.markdown("\n")
114
+ if st.session_state.model_name == 'openai':
115
+ with st.form('openai api key'):
116
+ api_key = st.text_input(
117
+ "Enter openai api key",
118
+ type="password",
119
+ value=st.session_state.openai_api_key,
120
+ help="enter an openai api key created from 'https://platform.openai.com/account/api-keys'",
121
+ )
122
+
123
+ submit_key = st.form_submit_button(
124
+ "Save key",
125
+ # on_click=update_parameters_change
126
+ )
127
+
128
+ if submit_key:
129
+ st.session_state.openai_api_key = api_key
130
+ # st.text(st.session_state.openai_api_key)
131
+ alert = st.success("openai api key updated")
132
+ time.sleep(1) # Wait for 3 seconds
133
+ alert.empty() # Clear the alert
134
+ st.markdown(
135
+ "### How to use\n"
136
+ "1. Select the LLM model\n" # noqa: E501
137
+ "1. If selected model asks for a api key enter a valid api key.\n" # noqa: E501
138
+ "1. Enter a text or a url to get a summary."
139
+ )
140
+ st.markdown("---")
141
+ st.markdown("""
142
+ This app supports text in the following formats:
143
+ - Raw text in text box
144
+ - URL of article/news to be summarized
145
+ - .txt, .pdf, .docx file formats
146
+ """)
147
 
 
 
 
 
148
 
149
  def load_app():
150
  st.title("Text Summarizer 📝")
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  inp_text = st.text_input("Enter text or a url here")
153
  st.markdown(
154
+ "<h4 style='text-align: center; color: green;'>OR</h4>",
155
  unsafe_allow_html=True,
156
  )
157
  uploaded_file = st.file_uploader(
 
177
  st.write(cleaned_txt[0])
178
  else:
179
  st.write(cleaned_txt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
+ submitted = st.button("Summarize")
 
182
 
183
+ if submitted:
184
+ if is_url:
185
+ text_to_summarize = " ".join([txt for txt in cleaned_txt])
186
+ else:
187
+ text_to_summarize = cleaned_txt
188
 
189
+ submit_text_to_summarize(text_to_summarize)
190
 
191
+ def submit_text_to_summarize(text_to_summarize):
192
+ summarized_text, time = get_summary(text_to_summarize)
193
+ display_output(summarized_text,time)
194
 
195
 
196
+ def get_summary(text_to_summarize):
197
+ model_name = st.session_state.model_name
198
+ summarizer_type = st.session_state.summarizer_type
199
+ model_type, tokenizer, base_summarizer = init_summarizer(model_name,api_key=None)
200
 
201
+ logger.info(f"Model Name: {model_name}")
202
+ logger.info(f"Summarization Type for Long Text: {summarizer_type}")
 
 
 
 
203
 
204
+ with st.spinner(
205
+ text="Creating summary. This might take a few seconds ..."
206
+ ):
207
+ if summarizer_type == "Refine":
208
+ # summarized_text, time = summarizer.summarize(text_to_summarize,"refine")
209
+ summarized_text, time = summarizer_summarize(model_type,tokenizer, base_summarizer, text_to_summarize ,summarizer_type = "refine")
210
+ return summarized_text, time
211
+ else :
212
+ # summarized_text, time = summarizer.summarize(text_to_summarize,"map_reduce")
213
+ summarized_text, time = summarizer_summarize(model_type,tokenizer, base_summarizer, text_to_summarize ,summarizer_type = "map_reduce")
214
+ return summarized_text, time
215
 
 
216
 
217
  def display_output(summarized_text,time):
 
 
218
  logger.info(f"SUMMARY: {summarized_text}")
219
  logger.info(f"Summary took {time}s")
220
  st.subheader("Summarized text")
 
222
  st.info(f"Time: {time}s")
223
 
224
 
225
+ def main():
226
+
227
+ initialize_app()
228
+ side_bar()
229
+ load_app()
230
+ # chat_body()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
 
233
  if __name__ == "__main__":
234
+ main()
235
+ # text_to_summarize, model_name, summarizer_type, summarize = load_app()
236
+ # summarized_text,time = get_summary(text_to_summarize, model_name, summarizer_type, summarize)
237
+ # display_output(summarized_text,time)
238
 
239
 
240
 
config.py CHANGED
@@ -1,5 +1,6 @@
1
  MODELS = {
2
- "Version 0":"IronOne-AI-Labs/long-t5-tglobal-16k-annual-reports-v0",
3
- "Version 1":"IronOne-AI-Labs/long-t5-tglobal-16k-annual-reports-v1",
4
- "OpenAI" : "IronOne-AI-Labs/long-t5-tglobal-16k-annual-reports-v1" #for tokenizer
 
5
  }
 
1
  MODELS = {
2
+ "long-t5 v0":"IronOne-AI-Labs/long-t5-tglobal-16k-annual-reports-v0",
3
+ "long-t5 v1":"IronOne-AI-Labs/long-t5-tglobal-16k-annual-reports-v1",
4
+ "pegasus-x-large v1" : "IronOne-AI-Labs/pegasus-x-large-annual-report-QLoRA-fine-tuned-v1.1", #for tokenizer
5
+ "openai" : "IronOne-AI-Labs/pegasus-x-large-annual-report-QLoRA-fine-tuned-v1.1"
6
  }
summarizer.py CHANGED
@@ -8,65 +8,66 @@ from logging import getLogger
8
  import time
9
 
10
  logger = getLogger(__name__)
11
- class Summarizer:
12
 
13
 
14
- def __init__(self,model_name,model_type,api_key=None) -> None:
15
- self.model_type = model_type
16
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
17
- self.base_summarizer = get_model(model_type,model_name,api_key)
18
 
19
- def summarize(self,text:str,summarizer_type = "map_reduce")->str:
 
 
 
 
20
 
21
- text_to_summarize,length_type = prepare_for_summarize(text,self.tokenizer)
22
 
23
- if length_type =="short":
24
 
25
- logger.info("Processing Input Text less than 12000 Tokens")
26
- if self.model_type=="openai":
27
- llm = self.base_summarizer
28
- prompt = PromptTemplate.from_template(
29
- template="""Write a concise and complete summary in bullet points of the given annual report.
30
- Important:
31
- * Note that the summary should contain all important information and it should not contain any unwanted information.
32
- * Make sure to keep the summary as short as possible. And Summary should be in bullet points. Seperate each point with a new line.
33
- TEXT: {text}
34
- SUMMARY:"""
35
- )
36
- llm_chain = prompt|llm
37
- start = time.time()
38
- summary = llm_chain.invoke({"text": text_to_summarize})
39
- end = time.time()
40
- print(f"Summary generation took {round((end-start),2)}s.")
41
- return summary,round((end-start),2)
42
-
43
- elif self.model_type == "local":
44
- pipe = self.base_summarizer
45
- start = time.time()
46
- summary = pipe(text_to_summarize)[0]['summary_text']
47
- end = time.time()
48
- print(f"Summary generation took {round((end-start),2)}s.")
49
- return summary,round((end-start),2)
50
- else:
51
- if summarizer_type == "refine":
52
- print("The text is too long, Running Refine Summarizer")
53
- llm_chain = get_refine_chain(self.base_summarizer,self.model_type)
54
- logger.info("Running Refine Chain for Summarization")
55
- start = time.time()
56
- summary = llm_chain.invoke({"input_documents": text_to_summarize}, return_only_outputs=True)['output_text']
57
- end = time.time()
58
- print(f"Summary generation took {round((end-start),2)}s.")
59
- return summary,round((end-start),2)
 
 
60
 
61
 
62
- else:
63
- print("The text is too long, Running Map Reduce Summarizer")
64
-
65
- llm_chain = get_map_reduce_chain(self.base_summarizer,model_type=self.model_type)
66
- logger.info("Running Map Reduce Chain for Summarization")
67
- start = time.time()
68
- summary = llm_chain.invoke({"input_documents": text_to_summarize}, return_only_outputs=True)['output_text']
69
- end = time.time()
70
- print(f"Summary generation took {round((end-start),2)}s.")
71
- return summary,round((end-start),2)
72
 
 
 
 
 
 
 
 
 
 
8
  import time
9
 
10
  logger = getLogger(__name__)
 
11
 
12
 
 
 
 
 
13
 
14
+ def summarizer_init(model_name,model_type,api_key=None) -> None:
15
+ # model_type = model_type
16
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
17
+ base_summarizer = get_model(model_type,model_name,api_key)
18
+ return tokenizer,base_summarizer
19
 
20
+ def summarizer_summarize(model_type,tokenizer, base_summarizer, text:str,summarizer_type = "map_reduce")->str:
21
 
22
+ text_to_summarize,length_type = prepare_for_summarize(text,tokenizer)
23
 
24
+ if length_type =="short":
25
+
26
+ logger.info("Processing Input Text less than 12000 Tokens")
27
+ if model_type=="openai":
28
+ llm = base_summarizer
29
+ prompt = PromptTemplate.from_template(
30
+ template="""Write a concise and complete summary in bullet points of the given annual report.
31
+ Important:
32
+ * Note that the summary should contain all important information and it should not contain any unwanted information.
33
+ * Make sure to keep the summary as short as possible. And Summary should be in bullet points. Seperate each point with a new line.
34
+ TEXT: {text}
35
+ SUMMARY:"""
36
+ )
37
+ llm_chain = prompt|llm
38
+ start = time.time()
39
+ summary = llm_chain.invoke({"text": text_to_summarize})
40
+ end = time.time()
41
+ print(f"Summary generation took {round((end-start),2)}s.")
42
+ return summary,round((end-start),2)
43
+
44
+ elif model_type == "local":
45
+ pipe = base_summarizer
46
+ start = time.time()
47
+ summary = pipe(text_to_summarize)[0]['summary_text']
48
+ end = time.time()
49
+ print(f"Summary generation took {round((end-start),2)}s.")
50
+ return summary,round((end-start),2)
51
+ else:
52
+ if summarizer_type == "refine":
53
+ print("The text is too long, Running Refine Summarizer")
54
+ llm_chain = get_refine_chain(base_summarizer,model_type)
55
+ logger.info("Running Refine Chain for Summarization")
56
+ start = time.time()
57
+ summary = llm_chain.invoke({"input_documents": text_to_summarize}, return_only_outputs=True)['output_text']
58
+ end = time.time()
59
+ print(f"Summary generation took {round((end-start),2)}s.")
60
+ return summary,round((end-start),2)
61
 
62
 
63
+ else:
64
+ print("The text is too long, Running Map Reduce Summarizer")
 
 
 
 
 
 
 
 
65
 
66
+ llm_chain = get_map_reduce_chain(base_summarizer,model_type=model_type)
67
+ logger.info("Running Map Reduce Chain for Summarization")
68
+ start = time.time()
69
+ summary = llm_chain.invoke({"input_documents": text_to_summarize}, return_only_outputs=True)['output_text']
70
+ end = time.time()
71
+ print(f"Summary generation took {round((end-start),2)}s.")
72
+ return summary,round((end-start),2)
73
+