KwabsHug commited on
Commit
8a460a9
1 Parent(s): 5fa4476

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -60
app.py CHANGED
@@ -87,17 +87,17 @@ def Wordchunker(word):
87
  return chunks
88
 
89
  def BatchWordChunk(sentence):
90
- words = sentence.split(" ")
91
- FinalOutput = ""
92
- Currentchunks = ""
93
- ChunksasString = ""
94
- for word in words:
95
  ChunksasString = ""
96
- Currentchunks = Wordchunker(word)
97
- for chunk in Currentchunks:
98
- ChunksasString += chunk + " "
99
- FinalOutput += "\n" + ChunksasString
100
- return FinalOutput
 
 
101
 
102
  # Translate from English to French
103
 
@@ -106,22 +106,22 @@ langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="C
106
  ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
107
 
108
  def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
109
- FinalOutput = ""
110
- TransFinalOutput = ""
111
- if Chunkmode=="Chunks":
112
- FinalOutput += Sentencechunker(Text)
113
- if Chunkmode=="Reverse":
114
- FinalOutput += ReverseSentenceChunker(Text)
115
- if Chunkmode=="Three Word Chunks":
116
- FinalOutput += three_words_chunk(Text)
117
- if Chunkmode=="Spelling Chunks":
118
- FinalOutput += BatchWordChunk(Text)
119
-
120
- if Translate:
121
- TransFinalOutput = FinalOutput
122
- translated = translator.translate(TransFinalOutput, dest=langdest)
123
- FinalOutput += "\n" + translated.text
124
- return FinalOutput
125
 
126
  # Define a function to filter out non-verb, noun, or adjective words
127
  def filter_words(words):
@@ -137,13 +137,13 @@ def filter_words(words):
137
  return filtered_words
138
 
139
  def SepHypandSynExpansion(text):
140
- # Tokenize the text
141
- tokens = nltk.word_tokenize(text)
142
- NoHits = ""
143
- FinalOutput = ""
144
-
145
- # Find synonyms and hypernyms of each word in the text
146
- for token in tokens:
147
  synonyms = []
148
  hypernyms = []
149
  for synset in wordnet.synsets(token):
@@ -153,15 +153,15 @@ def SepHypandSynExpansion(text):
153
  NoHits += f"{token} | "
154
  else:
155
  FinalOutput += "\n" f"{token}: hypernyms={hypernyms}, synonyms={synonyms} \n"
156
- NoHits = set(NoHits.split(" | "))
157
- NoHits = filter_words(NoHits)
158
- NoHits = "Words to pay special attention to: \n" + str(NoHits)
159
- return NoHits, FinalOutput
160
 
161
 
162
  def WikiSearch(term):
163
  termtoks = term.split(" ")
164
-
165
  for item in termtoks:
166
  # Search for the term on Wikipedia and get the first result
167
  result = wikipedia.search(item, results=20)
@@ -180,13 +180,13 @@ def create_dictionary(word_list, word_dict = {}):
180
  def merge_lines(roman_file, w4w_file, full_mean_file, macaronic_file):
181
  files = [roman_file, w4w_file, full_mean_file, macaronic_file]
182
  merged_lines = []
183
-
184
  with open(roman_file.name, "r") as f1, open(w4w_file.name, "r") as f2, \
185
  open(full_mean_file.name, "r") as f3, open(macaronic_file.name, "r") as f4:
186
  for lines in zip(f1, f2, f3, f4):
187
  merged_line = "\n".join(line.strip() for line in lines)
188
  merged_lines.append(merged_line)
189
-
190
  return "\n".join(merged_lines)
191
 
192
  TTSLangOptions = gr.Dropdown(choices=["en", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt")
@@ -340,26 +340,26 @@ def find_string_positions(s, string):
340
  return positions
341
 
342
  def splittext(string):
343
- string_no_formaterror = string.replace(" -- > ", " --> ")
344
- split_positions = find_string_positions(string_no_formaterror, " --> ")
345
- split_strings = []
346
- prepos = 0
347
- for pos in split_positions:
348
- pos -= 12
349
- split_strings.append((string[prepos:pos])) #, string[pos:]))
350
- prepos = pos
351
-
352
- FinalOutput = ""
353
- stoutput = ""
354
- linenumber = 1
355
- #print(linenumber)
356
- for item in split_strings[1:]:
357
- stoutput = item[0:29] + "\n" + item[30:]
358
- stspaces = find_string_positions(stoutput, " ")
359
- FinalOutput += str(linenumber) + "\n" + stoutput[:stspaces[-2]] + "\n"
360
- FinalOutput += "\n"
361
- linenumber += 1
362
- return FinalOutput[2:]
363
 
364
  def VideotoSegment(video_file, subtitle_file):
365
  # Read the subtitle file and extract the timings for each subtitle
 
87
  return chunks
88
 
89
  def BatchWordChunk(sentence):
90
+ words = sentence.split(" ")
91
+ FinalOutput = ""
92
+ Currentchunks = ""
 
 
93
  ChunksasString = ""
94
+ for word in words:
95
+ ChunksasString = ""
96
+ Currentchunks = Wordchunker(word)
97
+ for chunk in Currentchunks:
98
+ ChunksasString += chunk + " "
99
+ FinalOutput += "\n" + ChunksasString
100
+ return FinalOutput
101
 
102
  # Translate from English to French
103
 
 
106
  ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
107
 
108
  def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
109
+ FinalOutput = ""
110
+ TransFinalOutput = ""
111
+ if Chunkmode=="Chunks":
112
+ FinalOutput += Sentencechunker(Text)
113
+ if Chunkmode=="Reverse":
114
+ FinalOutput += ReverseSentenceChunker(Text)
115
+ if Chunkmode=="Three Word Chunks":
116
+ FinalOutput += three_words_chunk(Text)
117
+ if Chunkmode=="Spelling Chunks":
118
+ FinalOutput += BatchWordChunk(Text)
119
+
120
+ if Translate:
121
+ TransFinalOutput = FinalOutput
122
+ translated = translator.translate(TransFinalOutput, dest=langdest)
123
+ FinalOutput += "\n" + translated.text
124
+ return FinalOutput
125
 
126
  # Define a function to filter out non-verb, noun, or adjective words
127
  def filter_words(words):
 
137
  return filtered_words
138
 
139
  def SepHypandSynExpansion(text):
140
+ # Tokenize the text
141
+ tokens = nltk.word_tokenize(text)
142
+ NoHits = ""
143
+ FinalOutput = ""
144
+
145
+ # Find synonyms and hypernyms of each word in the text
146
+ for token in tokens:
147
  synonyms = []
148
  hypernyms = []
149
  for synset in wordnet.synsets(token):
 
153
  NoHits += f"{token} | "
154
  else:
155
  FinalOutput += "\n" f"{token}: hypernyms={hypernyms}, synonyms={synonyms} \n"
156
+ NoHits = set(NoHits.split(" | "))
157
+ NoHits = filter_words(NoHits)
158
+ NoHits = "Words to pay special attention to: \n" + str(NoHits)
159
+ return NoHits, FinalOutput
160
 
161
 
162
  def WikiSearch(term):
163
  termtoks = term.split(" ")
164
+
165
  for item in termtoks:
166
  # Search for the term on Wikipedia and get the first result
167
  result = wikipedia.search(item, results=20)
 
180
  def merge_lines(roman_file, w4w_file, full_mean_file, macaronic_file):
181
  files = [roman_file, w4w_file, full_mean_file, macaronic_file]
182
  merged_lines = []
183
+
184
  with open(roman_file.name, "r") as f1, open(w4w_file.name, "r") as f2, \
185
  open(full_mean_file.name, "r") as f3, open(macaronic_file.name, "r") as f4:
186
  for lines in zip(f1, f2, f3, f4):
187
  merged_line = "\n".join(line.strip() for line in lines)
188
  merged_lines.append(merged_line)
189
+
190
  return "\n".join(merged_lines)
191
 
192
  TTSLangOptions = gr.Dropdown(choices=["en", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt")
 
340
  return positions
341
 
342
  def splittext(string):
343
+ string_no_formaterror = string.replace(" -- > ", " --> ")
344
+ split_positions = find_string_positions(string_no_formaterror, " --> ")
345
+ split_strings = []
346
+ prepos = 0
347
+ for pos in split_positions:
348
+ pos -= 12
349
+ split_strings.append((string[prepos:pos])) #, string[pos:]))
350
+ prepos = pos
351
+
352
+ FinalOutput = ""
353
+ stoutput = ""
354
+ linenumber = 1
355
+ #print(linenumber)
356
+ for item in split_strings[1:]:
357
+ stoutput = item[0:29] + "\n" + item[30:]
358
+ stspaces = find_string_positions(stoutput, " ")
359
+ FinalOutput += str(linenumber) + "\n" + stoutput[:stspaces[-2]] + "\n"
360
+ FinalOutput += "\n"
361
+ linenumber += 1
362
+ return FinalOutput[2:]
363
 
364
  def VideotoSegment(video_file, subtitle_file):
365
  # Read the subtitle file and extract the timings for each subtitle