AdWeeb commited on
Commit
362b303
1 Parent(s): 36f869a

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +13 -2
utils.py CHANGED
@@ -24,7 +24,8 @@ import nltk
24
  nltk.download('punkt')
25
  nltk.download('averaged_perceptron_tagger')
26
  from nltk.tokenize import sent_tokenize
27
- import re
 
28
  def welcome():
29
  return "Welcome All"
30
 
@@ -84,4 +85,14 @@ def get_translation(source, dest, text):
84
  translation = translator.translate(i)
85
  trans.append(translation)
86
 
87
- return ' '.join(trans)
 
 
 
 
 
 
 
 
 
 
24
  nltk.download('punkt')
25
  nltk.download('averaged_perceptron_tagger')
26
  from nltk.tokenize import sent_tokenize
27
+ import re
28
+ import stanfordnlp
29
  def welcome():
30
  return "Welcome All"
31
 
85
  translation = translator.translate(i)
86
  trans.append(translation)
87
 
88
+ return ' '.join(trans)
89
+
90
+
91
+ def truecasing_by_sentence_segmentation(input_text):
92
+ # split the text into sentences
93
+ sentences = sent_tokenize(input_text, language='english')
94
+ # capitalize the sentences
95
+ sentences_capitalized = [s.capitalize() for s in sentences]
96
+ # join the capitalized sentences
97
+ text_truecase = re.sub(" (?=[\.,'!?:;])", "", ' '.join(sentences_capitalized))
98
+ return text_truecase