Update utils.py
Browse files
utils.py
CHANGED
@@ -24,7 +24,8 @@ import nltk
|
|
24 |
nltk.download('punkt')
|
25 |
nltk.download('averaged_perceptron_tagger')
|
26 |
from nltk.tokenize import sent_tokenize
|
27 |
-
import re
|
|
|
28 |
def welcome():
|
29 |
return "Welcome All"
|
30 |
|
@@ -84,4 +85,14 @@ def get_translation(source, dest, text):
|
|
84 |
translation = translator.translate(i)
|
85 |
trans.append(translation)
|
86 |
|
87 |
-
return ' '.join(trans)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
nltk.download('punkt')
|
25 |
nltk.download('averaged_perceptron_tagger')
|
26 |
from nltk.tokenize import sent_tokenize
|
27 |
+
import re
|
28 |
+
import stanfordnlp
|
29 |
def welcome():
|
30 |
return "Welcome All"
|
31 |
|
85 |
translation = translator.translate(i)
|
86 |
trans.append(translation)
|
87 |
|
88 |
+
return ' '.join(trans)
|
89 |
+
|
90 |
+
|
91 |
+
def truecasing_by_sentence_segmentation(input_text):
|
92 |
+
# split the text into sentences
|
93 |
+
sentences = sent_tokenize(input_text, language='english')
|
94 |
+
# capitalize the sentences
|
95 |
+
sentences_capitalized = [s.capitalize() for s in sentences]
|
96 |
+
# join the capitalized sentences
|
97 |
+
text_truecase = re.sub(" (?=[\.,'!?:;])", "", ' '.join(sentences_capitalized))
|
98 |
+
return text_truecase
|