Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -153,38 +153,7 @@ def WikiSearch(term):
|
|
153 |
for item in termtoks:
|
154 |
# Search for the term on Wikipedia and get the first result
|
155 |
result = wikipedia.search(item, results=20)
|
156 |
-
return result
|
157 |
-
|
158 |
-
def find_string_positions(s, string):
|
159 |
-
positions = []
|
160 |
-
start = 0
|
161 |
-
while True:
|
162 |
-
position = s.find(string, start)
|
163 |
-
if position == -1:
|
164 |
-
break
|
165 |
-
positions.append(position)
|
166 |
-
start = position + len(string)
|
167 |
-
return positions
|
168 |
-
|
169 |
-
def splittext(string, split_positions):
|
170 |
-
split_strings = []
|
171 |
-
prepos = 0
|
172 |
-
for pos in split_positions:
|
173 |
-
pos -= 12
|
174 |
-
split_strings.append((string[prepos:pos])) #, string[pos:]))
|
175 |
-
prepos = pos
|
176 |
-
|
177 |
-
FinalOutput = ""
|
178 |
-
stoutput = ""
|
179 |
-
linenumber = 1
|
180 |
-
print(linenumber)
|
181 |
-
for item in split_strings[1:]:
|
182 |
-
stoutput = item[0:29] + "\n" + item[30:]
|
183 |
-
stspaces = find_string_positions(stoutput, " ")
|
184 |
-
FinalOutput += str(linenumber) + "\n" + stoutput[:stspaces[-2]] + "\n"
|
185 |
-
FinalOutput += "\n"
|
186 |
-
linenumber += 1
|
187 |
-
return FinalOutput[2:]
|
188 |
|
189 |
def create_dictionary(word_list, word_dict = {}):
|
190 |
word_list = set(word_list.split(" "))
|
@@ -262,6 +231,62 @@ def split_verbs_nouns(text):
|
|
262 |
|
263 |
return verbs_nouns_text, other_words_text
|
264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
groupinput_text = gr.inputs.Textbox(lines=2, label="Enter a list of words")
|
266 |
groupoutput_text = gr.outputs.Textbox(label="Grouped words")
|
267 |
|
@@ -311,7 +336,8 @@ with gr.Blocks() as lliface:
|
|
311 |
with gr.Tab("Advanced - LingQ Addons ideas"):
|
312 |
gr.HTML("Extra functions needed - Persitent Sentence translation, UNWFWO, POS tagging and Word Count per user of words in their account. Macaronic Text is also another way to practice only the important information")
|
313 |
with gr.Tab("Merged Subtitles"):
|
314 |
-
gr.
|
|
|
315 |
with gr.Row():
|
316 |
RomanFile = gr.File(label="Paste Roman")
|
317 |
W4WFile = gr.File(label="Paste Word 4 Word")
|
|
|
153 |
for item in termtoks:
|
154 |
# Search for the term on Wikipedia and get the first result
|
155 |
result = wikipedia.search(item, results=20)
|
156 |
+
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
def create_dictionary(word_list, word_dict = {}):
|
159 |
word_list = set(word_list.split(" "))
|
|
|
231 |
|
232 |
return verbs_nouns_text, other_words_text
|
233 |
|
234 |
+
def split_srt_file(file_path):
|
235 |
+
# Open the SRT file and read its contents
|
236 |
+
with open(file_path, 'r') as f:
|
237 |
+
srt_contents = f.read()
|
238 |
+
|
239 |
+
# Split the SRT file by timestamp
|
240 |
+
srt_sections = srt_contents.split('\n\n')
|
241 |
+
|
242 |
+
# Loop through each section of the SRT file
|
243 |
+
for i in range(len(srt_sections)):
|
244 |
+
# Split the section into its timestamp and subtitle text
|
245 |
+
section_lines = srt_sections[i].split('\n')
|
246 |
+
timestamp = section_lines[1]
|
247 |
+
subtitle_text = ' | '.join(section_lines[2:])
|
248 |
+
|
249 |
+
# Replace spaces in the subtitle text with " | "
|
250 |
+
subtitle_text = subtitle_text.replace(' ', ' | ')
|
251 |
+
|
252 |
+
# Reconstruct the section with the updated subtitle text
|
253 |
+
srt_sections[i] = f"{section_lines[0]}\n{timestamp}\n{subtitle_text[3:]}"
|
254 |
+
|
255 |
+
# Join the SRT sections back together into a single string
|
256 |
+
return '\n\n'.join(srt_sections)
|
257 |
+
|
258 |
+
def find_string_positions(s, string):
|
259 |
+
positions = []
|
260 |
+
start = 0
|
261 |
+
while True:
|
262 |
+
position = s.find(string, start)
|
263 |
+
if position == -1:
|
264 |
+
break
|
265 |
+
positions.append(position)
|
266 |
+
start = position + len(string)
|
267 |
+
return positions
|
268 |
+
|
269 |
+
def splittext(string):
|
270 |
+
split_positions = find_string_positions(string, " --> ")
|
271 |
+
split_strings = []
|
272 |
+
prepos = 0
|
273 |
+
for pos in split_positions:
|
274 |
+
pos -= 12
|
275 |
+
split_strings.append((string[prepos:pos])) #, string[pos:]))
|
276 |
+
prepos = pos
|
277 |
+
|
278 |
+
FinalOutput = ""
|
279 |
+
stoutput = ""
|
280 |
+
linenumber = 1
|
281 |
+
print(linenumber)
|
282 |
+
for item in split_strings[1:]:
|
283 |
+
stoutput = item[0:29] + "\n" + item[30:]
|
284 |
+
stspaces = find_string_positions(stoutput, " ")
|
285 |
+
FinalOutput += str(linenumber) + "\n" + stoutput[:stspaces[-2]] + "\n"
|
286 |
+
FinalOutput += "\n"
|
287 |
+
linenumber += 1
|
288 |
+
return FinalOutput[2:]
|
289 |
+
|
290 |
groupinput_text = gr.inputs.Textbox(lines=2, label="Enter a list of words")
|
291 |
groupoutput_text = gr.outputs.Textbox(label="Grouped words")
|
292 |
|
|
|
336 |
with gr.Tab("Advanced - LingQ Addons ideas"):
|
337 |
gr.HTML("Extra functions needed - Persitent Sentence translation, UNWFWO, POS tagging and Word Count per user of words in their account. Macaronic Text is also another way to practice only the important information")
|
338 |
with gr.Tab("Merged Subtitles"):
|
339 |
+
gr.Interface(fn=split_srt_file, inputs="file", outputs="text", title="Text for w4w creation in G Translate")
|
340 |
+
gr.Interface(fn=splittext, inputs="file", outputs="text", title="Text for w4w creation in G Translate"))
|
341 |
with gr.Row():
|
342 |
RomanFile = gr.File(label="Paste Roman")
|
343 |
W4WFile = gr.File(label="Paste Word 4 Word")
|