Spaces:
Runtime error
Runtime error
Fixed nltk bug
Browse files- summarizer.py +6 -1
summarizer.py
CHANGED
@@ -3,6 +3,7 @@ from transformers import BartTokenizer, TFBartForConditionalGeneration
|
|
3 |
from Utils import get_input_chunks
|
4 |
import networkx as nx
|
5 |
from nltk.tokenize import sent_tokenize
|
|
|
6 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
7 |
import community
|
8 |
from title_generator import T5Summarizer
|
@@ -47,7 +48,11 @@ class BARTSummarizer:
|
|
47 |
def preprocess_for_auto_chapters(self, text: str):
|
48 |
|
49 |
# Tokenize the text into sentences
|
50 |
-
|
|
|
|
|
|
|
|
|
51 |
|
52 |
# Filter out empty sentences and sentences with less than 5 words
|
53 |
sentences = [sentence for sentence in sentences if len(sentence.strip()) > 0 and len(sentence.split(" ")) > 4]
|
|
|
3 |
from Utils import get_input_chunks
|
4 |
import networkx as nx
|
5 |
from nltk.tokenize import sent_tokenize
|
6 |
+
import nltk
|
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
import community
|
9 |
from title_generator import T5Summarizer
|
|
|
48 |
def preprocess_for_auto_chapters(self, text: str):
|
49 |
|
50 |
# Tokenize the text into sentences
|
51 |
+
try:
|
52 |
+
sentences = sent_tokenize(text)
|
53 |
+
except:
|
54 |
+
nltk.download('punkt')
|
55 |
+
sentences = sent_tokenize(text)
|
56 |
|
57 |
# Filter out empty sentences and sentences with less than 5 words
|
58 |
sentences = [sentence for sentence in sentences if len(sentence.strip()) > 0 and len(sentence.split(" ")) > 4]
|