KevlarVK commited on
Commit
385c7ce
1 Parent(s): 9a4b6ed

Fixed nltk bug

Browse files
Files changed (1) hide show
  1. summarizer.py +6 -1
summarizer.py CHANGED
@@ -3,6 +3,7 @@ from transformers import BartTokenizer, TFBartForConditionalGeneration
3
  from Utils import get_input_chunks
4
  import networkx as nx
5
  from nltk.tokenize import sent_tokenize
 
6
  from sklearn.feature_extraction.text import TfidfVectorizer
7
  import community
8
  from title_generator import T5Summarizer
@@ -47,7 +48,11 @@ class BARTSummarizer:
47
  def preprocess_for_auto_chapters(self, text: str):
48
 
49
  # Tokenize the text into sentences
50
- sentences = sent_tokenize(text)
 
 
 
 
51
 
52
  # Filter out empty sentences and sentences with less than 5 words
53
  sentences = [sentence for sentence in sentences if len(sentence.strip()) > 0 and len(sentence.split(" ")) > 4]
 
3
  from Utils import get_input_chunks
4
  import networkx as nx
5
  from nltk.tokenize import sent_tokenize
6
+ import nltk
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  import community
9
  from title_generator import T5Summarizer
 
48
  def preprocess_for_auto_chapters(self, text: str):
49
 
50
  # Tokenize the text into sentences
51
+ try:
52
+ sentences = sent_tokenize(text)
53
+ except:
54
+ nltk.download('punkt')
55
+ sentences = sent_tokenize(text)
56
 
57
  # Filter out empty sentences and sentences with less than 5 words
58
  sentences = [sentence for sentence in sentences if len(sentence.strip()) > 0 and len(sentence.split(" ")) > 4]