Update digestor.py
Browse files- digestor.py +2 -2
digestor.py
CHANGED
@@ -41,7 +41,7 @@ class Digestor:
|
|
41 |
]) = None
|
42 |
|
43 |
# Summarization params:
|
44 |
-
token_limit: int =
|
45 |
word_limit: int = 400
|
46 |
SUMMARIZATION_PARAMETERS = {
|
47 |
"do_sample": False,
|
@@ -139,7 +139,7 @@ class Digestor:
|
|
139 |
"""Breaks articles into chunks that will fit the desired token length limit"""
|
140 |
# Get approximate word count
|
141 |
words = len(piece.split(' ')) # rough estimate of words. # words <= number tokens generally.
|
142 |
-
# get number of chunks by
|
143 |
# Create list of ints to create rangelist from
|
144 |
base_range = [i*limit for i in range(words//limit+1)]
|
145 |
# For articles less than limit in length base_range will only contain zero.
|
|
|
41 |
]) = None
|
42 |
|
43 |
# Summarization params:
|
44 |
+
token_limit: int = 1024
|
45 |
word_limit: int = 400
|
46 |
SUMMARIZATION_PARAMETERS = {
|
47 |
"do_sample": False,
|
|
|
139 |
"""Breaks articles into chunks that will fit the desired token length limit"""
|
140 |
# Get approximate word count
|
141 |
words = len(piece.split(' ')) # rough estimate of words. # words <= number tokens generally.
|
142 |
+
# get number of chunks by dividing number of words by chunk size (word limit)
|
143 |
# Create list of ints to create rangelist from
|
144 |
base_range = [i*limit for i in range(words//limit+1)]
|
145 |
# For articles less than limit in length base_range will only contain zero.
|