Spaces:

KevlarVK
/

content_summarizer

Runtime error

KevlarVK commited on Mar 31, 2023

Commit

385c7ce

1 Parent(s): 9a4b6ed

Fixed nltk bug

Files changed (1) hide show

summarizer.py CHANGED Viewed

@@ -3,6 +3,7 @@ from transformers import BartTokenizer, TFBartForConditionalGeneration
 from Utils import get_input_chunks
 import networkx as nx
 from nltk.tokenize import sent_tokenize
 from sklearn.feature_extraction.text import TfidfVectorizer
 import community
 from title_generator import T5Summarizer
@@ -47,7 +48,11 @@ class BARTSummarizer:
     def preprocess_for_auto_chapters(self, text: str):
         # Tokenize the text into sentences
-        sentences = sent_tokenize(text)
         # Filter out empty sentences and sentences with less than 5 words
         sentences = [sentence for sentence in sentences if len(sentence.strip()) > 0 and len(sentence.split(" ")) > 4]

 from Utils import get_input_chunks
 import networkx as nx
 from nltk.tokenize import sent_tokenize
+import nltk
 from sklearn.feature_extraction.text import TfidfVectorizer
 import community
 from title_generator import T5Summarizer
     def preprocess_for_auto_chapters(self, text: str):
         # Tokenize the text into sentences
+        try:
+            sentences = sent_tokenize(text)
+        except:
+            nltk.download('punkt')
+            sentences = sent_tokenize(text)
         # Filter out empty sentences and sentences with less than 5 words
         sentences = [sentence for sentence in sentences if len(sentence.strip()) > 0 and len(sentence.split(" ")) > 4]