Update digestor.py
Browse files- digestor.py +2 -2
digestor.py
CHANGED
@@ -158,8 +158,8 @@ class Digestor:
|
|
158 |
|
159 |
# Finally, chunk the piece, adjusting the chunks if too long.
|
160 |
for i, j in range_list:
|
161 |
-
if (tokenized_len := len(tokenizer(chunk := ' '.join(fractured[i:j])
|
162 |
-
chunk_list.append(chunk)
|
163 |
else: # if chunks of <limit> words are too long, back them off.
|
164 |
chunk_list.append(' '.join(chunk.split(' ')[: self.token_limit - tokenized_len ])) # tokenized_len ]).replace('\n',' '))
|
165 |
|
|
|
158 |
|
159 |
# Finally, chunk the piece, adjusting the chunks if too long.
|
160 |
for i, j in range_list:
|
161 |
+
if (tokenized_len := len(tokenizer(chunk := ' '.join(fractured[i:j])))) <= self.token_limit: # d[i:j]).replace('\n',' ')))) <= self.token_limit:
|
162 |
+
chunk_list.append(chunk.replace(' .','. '))
|
163 |
else: # if chunks of <limit> words are too long, back them off.
|
164 |
chunk_list.append(' '.join(chunk.split(' ')[: self.token_limit - tokenized_len ])) # tokenized_len ]).replace('\n',' '))
|
165 |
|