green commited on
Commit
53247c1
1 Parent(s): 03dd514

Update digestor.py

Browse files
Files changed (1) hide show
  1. digestor.py +2 -2
digestor.py CHANGED
@@ -159,10 +159,10 @@ class Digestor:
159
  # Finally, chunk the piece, adjusting the chunks if too long.
160
  for i, j in range_list:
161
  if (tokenized_len := len(tokenizer(chunk := ' '.join(fractured[i:j])))) <= self.token_limit: # d[i:j]).replace('\n',' ')))) <= self.token_limit:
162
- chunk_list.append(chunk.replace(' .','. '))
163
  else: # if chunks of <limit> words are too long, back them off.
164
  chunk_list.append(' '.join(chunk.split(' ')[: self.token_limit - tokenized_len ])) # tokenized_len ]).replace('\n',' '))
165
-
166
  return chunk_list
167
 
168
 
159
  # Finally, chunk the piece, adjusting the chunks if too long.
160
  for i, j in range_list:
161
  if (tokenized_len := len(tokenizer(chunk := ' '.join(fractured[i:j])))) <= self.token_limit: # d[i:j]).replace('\n',' ')))) <= self.token_limit:
162
+ chunk_list.append(chunk)
163
  else: # if chunks of <limit> words are too long, back them off.
164
  chunk_list.append(' '.join(chunk.split(' ')[: self.token_limit - tokenized_len ])) # tokenized_len ]).replace('\n',' '))
165
+ chunk_list = [i.replace(' . ','. ') for i in chunk_list]
166
  return chunk_list
167
 
168