distilbart-tos-summarizer-tosdr / test_summarizer.py
sdhanabal1's picture
Tune length parameters so that token size don't exceed 1024 which is the model limit
4f3c9ea
from Summarizer import Summarizer
def test_split_sentences_by_token_length():
summary_sentences = [
'Python is a programming language.',
'Memory allocation.',
'Free.'
]
split_sentences = Summarizer.split_sentences_by_token_length(summary_sentences, split_token_length=3)
assert split_sentences == [
'Python is a programming language.',
'Memory allocation. Free.'
]
split_sentences = Summarizer.split_sentences_by_token_length(summary_sentences, split_token_length=5)
assert split_sentences == [
'Python is a programming language.',
'Memory allocation. Free.'
]
split_sentences = Summarizer.split_sentences_by_token_length(summary_sentences, split_token_length=7)
assert split_sentences == [
'Python is a programming language. Memory allocation.',
'Free.'
]
split_sentences = Summarizer.split_sentences_by_token_length(summary_sentences, split_token_length=10)
assert split_sentences == [
'Python is a programming language. Memory allocation. Free.'
]