Spaces:

KwabsHug
/

Language-Learn-Idea

Running

App Files Files Community

KwabsHug commited on Mar 27, 2023

Commit

8a460a9

1 Parent(s): 5fa4476

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -60

app.py CHANGED Viewed

@@ -87,17 +87,17 @@ def Wordchunker(word):
     return chunks
 def BatchWordChunk(sentence):
-  words = sentence.split(" ")
-  FinalOutput = ""
-  Currentchunks = ""
-  ChunksasString = ""
-  for word in words:
     ChunksasString = ""
-    Currentchunks = Wordchunker(word)
-    for chunk in Currentchunks:
-      ChunksasString += chunk + " "
-    FinalOutput += "\n" + ChunksasString
-  return FinalOutput
 # Translate from English to French
@@ -106,22 +106,22 @@ langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="C
 ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
 def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
-  FinalOutput = ""
-  TransFinalOutput = ""
-  if Chunkmode=="Chunks":
-    FinalOutput += Sentencechunker(Text)
-  if Chunkmode=="Reverse":
-    FinalOutput += ReverseSentenceChunker(Text)
-  if Chunkmode=="Three Word Chunks":
-    FinalOutput += three_words_chunk(Text)
-  if Chunkmode=="Spelling Chunks":
-    FinalOutput += BatchWordChunk(Text)
-  if Translate:
-    TransFinalOutput = FinalOutput
-    translated = translator.translate(TransFinalOutput, dest=langdest)
-    FinalOutput += "\n" + translated.text
-  return FinalOutput
 # Define a function to filter out non-verb, noun, or adjective words
 def filter_words(words):
@@ -137,13 +137,13 @@ def filter_words(words):
     return filtered_words
 def SepHypandSynExpansion(text):
-  # Tokenize the text
-  tokens = nltk.word_tokenize(text)
-  NoHits = ""
-  FinalOutput = ""
-  # Find synonyms and hypernyms of each word in the text
-  for token in tokens:
       synonyms = []
       hypernyms = []
       for synset in wordnet.synsets(token):
@@ -153,15 +153,15 @@ def SepHypandSynExpansion(text):
           NoHits += f"{token} | "
       else:
           FinalOutput += "\n" f"{token}: hypernyms={hypernyms}, synonyms={synonyms} \n"
-  NoHits = set(NoHits.split(" | "))
-  NoHits = filter_words(NoHits)
-  NoHits = "Words to pay special attention to: \n" + str(NoHits)
-  return NoHits, FinalOutput
 def WikiSearch(term):
     termtoks = term.split(" ")
     for item in termtoks:
       # Search for the term on Wikipedia and get the first result
       result = wikipedia.search(item, results=20)
@@ -180,13 +180,13 @@ def create_dictionary(word_list, word_dict = {}):
 def merge_lines(roman_file, w4w_file, full_mean_file, macaronic_file):
     files = [roman_file, w4w_file, full_mean_file, macaronic_file]
     merged_lines = []
     with open(roman_file.name, "r") as f1, open(w4w_file.name, "r") as f2, \
             open(full_mean_file.name, "r") as f3, open(macaronic_file.name, "r") as f4:
         for lines in zip(f1, f2, f3, f4):
             merged_line = "\n".join(line.strip() for line in lines)
             merged_lines.append(merged_line)
     return "\n".join(merged_lines)
 TTSLangOptions = gr.Dropdown(choices=["en", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt")
@@ -340,26 +340,26 @@ def find_string_positions(s, string):
     return positions
 def splittext(string):
-  string_no_formaterror = string.replace(" -- > ", " --> ")
-  split_positions = find_string_positions(string_no_formaterror, " --> ")
-  split_strings = []
-  prepos = 0
-  for pos in split_positions:
-      pos -= 12
-      split_strings.append((string[prepos:pos])) #, string[pos:]))
-      prepos = pos
-  FinalOutput = ""
-  stoutput = ""
-  linenumber = 1
-  #print(linenumber)
-  for item in split_strings[1:]:
-    stoutput = item[0:29] + "\n" + item[30:]
-    stspaces = find_string_positions(stoutput, " ")
-    FinalOutput += str(linenumber) + "\n" + stoutput[:stspaces[-2]] + "\n"
-    FinalOutput += "\n"
-    linenumber += 1
-  return FinalOutput[2:]
 def VideotoSegment(video_file, subtitle_file):
     # Read the subtitle file and extract the timings for each subtitle

     return chunks
 def BatchWordChunk(sentence):
+    words = sentence.split(" ")
+    FinalOutput = ""
+    Currentchunks = ""
     ChunksasString = ""
+    for word in words:
+        ChunksasString = ""
+        Currentchunks = Wordchunker(word)
+        for chunk in Currentchunks:
+          ChunksasString += chunk + " "
+        FinalOutput += "\n" + ChunksasString
+    return FinalOutput
 # Translate from English to French
 ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
 def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
+    FinalOutput = ""
+    TransFinalOutput = ""
+    if Chunkmode=="Chunks":
+        FinalOutput += Sentencechunker(Text)
+    if Chunkmode=="Reverse":
+        FinalOutput += ReverseSentenceChunker(Text)
+    if Chunkmode=="Three Word Chunks":
+        FinalOutput += three_words_chunk(Text)
+    if Chunkmode=="Spelling Chunks":
+        FinalOutput += BatchWordChunk(Text)
+    if Translate:
+        TransFinalOutput = FinalOutput
+        translated = translator.translate(TransFinalOutput, dest=langdest)
+        FinalOutput += "\n" + translated.text
+    return FinalOutput
 # Define a function to filter out non-verb, noun, or adjective words
 def filter_words(words):
     return filtered_words
 def SepHypandSynExpansion(text):
+    # Tokenize the text
+    tokens = nltk.word_tokenize(text)
+    NoHits = ""
+    FinalOutput = ""
+    # Find synonyms and hypernyms of each word in the text
+    for token in tokens:
       synonyms = []
       hypernyms = []
       for synset in wordnet.synsets(token):
           NoHits += f"{token} | "
       else:
           FinalOutput += "\n" f"{token}: hypernyms={hypernyms}, synonyms={synonyms} \n"
+    NoHits = set(NoHits.split(" | "))
+    NoHits = filter_words(NoHits)
+    NoHits = "Words to pay special attention to: \n" + str(NoHits)
+    return NoHits, FinalOutput
 def WikiSearch(term):
     termtoks = term.split(" ")
     for item in termtoks:
       # Search for the term on Wikipedia and get the first result
       result = wikipedia.search(item, results=20)
 def merge_lines(roman_file, w4w_file, full_mean_file, macaronic_file):
     files = [roman_file, w4w_file, full_mean_file, macaronic_file]
     merged_lines = []
     with open(roman_file.name, "r") as f1, open(w4w_file.name, "r") as f2, \
             open(full_mean_file.name, "r") as f3, open(macaronic_file.name, "r") as f4:
         for lines in zip(f1, f2, f3, f4):
             merged_line = "\n".join(line.strip() for line in lines)
             merged_lines.append(merged_line)
     return "\n".join(merged_lines)
 TTSLangOptions = gr.Dropdown(choices=["en", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt")
     return positions
 def splittext(string):
+    string_no_formaterror = string.replace(" -- > ", " --> ")
+    split_positions = find_string_positions(string_no_formaterror, " --> ")
+    split_strings = []
+    prepos = 0
+    for pos in split_positions:
+        pos -= 12
+        split_strings.append((string[prepos:pos])) #, string[pos:]))
+        prepos = pos
+    FinalOutput = ""
+    stoutput = ""
+    linenumber = 1
+    #print(linenumber)
+    for item in split_strings[1:]:
+        stoutput = item[0:29] + "\n" + item[30:]
+        stspaces = find_string_positions(stoutput, " ")
+        FinalOutput += str(linenumber) + "\n" + stoutput[:stspaces[-2]] + "\n"
+        FinalOutput += "\n"
+        linenumber += 1
+    return FinalOutput[2:]
 def VideotoSegment(video_file, subtitle_file):
     # Read the subtitle file and extract the timings for each subtitle