m. polinsky commited on
Commit
a8c2197
1 Parent(s): b8fc76c

Update digestor.py

Browse files
Files changed (1) hide show
  1. digestor.py +3 -43
digestor.py CHANGED
@@ -174,6 +174,7 @@ class Digestor:
174
  # API CALLS: consider placing the code from query() into here. * * * *
175
  for chunk in chunklist:
176
  safe = False
 
177
  with Timer(name=f"{stubhead}_query_time", logger=None):
178
  while not safe and repeat < 4:
179
  try: # make these digest params.
@@ -190,7 +191,8 @@ class Digestor:
190
  print("Summarization error, repeating...")
191
  print(e)
192
  repeat+=1
193
- collection_bin.append(summarized_chunk)
 
194
  return collection_bin
195
 
196
 
@@ -207,46 +209,4 @@ class Digestor:
207
  for each in self.summaries:
208
  digest.append(' '.join(each.summary_text))
209
 
210
- # Create dict to write out digest data for analysis
211
- out_data = {}
212
- datetime_str = f"""{dt.now()}"""
213
- choices_str = ', '.join(self.user_choices)
214
- digest_str = '\n\n'.join(digest)
215
-
216
-
217
- # This is a long comprehension to store all the fields and values in each summary.
218
- # integer: {
219
- # name_of_field:value except for source,
220
- # which is unhashable so needs explicit handling.
221
- # }
222
- summaries = { # k is a summary tuple, i,p = enumerate(k)
223
- # Here we take the first dozen words of the first summary chunk as key
224
- c: {
225
- # field name : value unless its the source
226
- k._fields[i]:p if k._fields[i]!='source'
227
- else
228
- {
229
- 'name': k.source.source_name,
230
- 'source_url': k.source.source_url,
231
- 'Summarization" Checkpoint': k.source.source_summarization_checkpoint,
232
- 'NER Checkpoint': k.source.source_ner_checkpoint,
233
- } for i,p in enumerate(k)
234
- } for c,k in enumerate(self.summaries)}
235
-
236
- out_data['timestamp'] = datetime_str
237
- out_data['choices'] = choices_str
238
- out_data['digest_text'] = digest_str
239
- out_data['article_count'] = len(self.summaries)
240
- out_data['digest_length'] = len(digest_str.split(" "))
241
- out_data['digest_time'] = self.timer.timers['digest_time']
242
- out_data['sum_params'] = {
243
- 'token_limit':self.token_limit,
244
- 'word_limit':self.word_limit,
245
- 'params':self.SUMMARIZATION_PARAMETERS,
246
- }
247
- out_data['summaries'] = summaries
248
-
249
-
250
  self.text = digest_str
251
-
252
- return out_data
174
  # API CALLS: consider placing the code from query() into here. * * * *
175
  for chunk in chunklist:
176
  safe = False
177
+ summarized_chunk = None
178
  with Timer(name=f"{stubhead}_query_time", logger=None):
179
  while not safe and repeat < 4:
180
  try: # make these digest params.
191
  print("Summarization error, repeating...")
192
  print(e)
193
  repeat+=1
194
+ if summarizaed_chunk is not None:
195
+ collection_bin.append(summarized_chunk)
196
  return collection_bin
197
 
198
 
209
  for each in self.summaries:
210
  digest.append(' '.join(each.summary_text))
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  self.text = digest_str