Spaces:

NgalNgal
/

mT5-new

Running

App Files Files Community

NgalNgal commited on 6 days ago

Commit

d226f22

•

1 Parent(s): 0e5ca43

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -15

app.py CHANGED Viewed

@@ -98,20 +98,35 @@ def write_to_file_english(source):
     return segmented_contents
-def call_model_transformer(sources, direction_trans):
     if direction_trans == "English to Myanmar":
         ct_model_path = "enmy_ctranslate2/"
         sp_source_model_path = "enmy_ctranslate2/source.model"
         sp_target_model_path = "enmy_ctranslate2/target.model"
-        if sources == "" :
             gr.Warning("Please Enter English Text")
         else:
-            sp_source_model = sp.load("enmy_ctranslate2/source.model")
-            sp_target_model = sp.load("enmy_ctranslate2/target.model")
-            #sources_seg = write_to_file_english(sources)
             # Subword the source sentences
-            #print(sources_seg)
-            source_sents_subworded = sp.encode_as_pieces([sources])
             # Translate the source sentences
             translator = ctranslate2.Translator(ct_model_path, device="cpu")  # or "cuda" for GPU
             translations = translator.translate_batch(source_sents_subworded, batch_type="tokens", max_batch_size=4096)
@@ -123,6 +138,17 @@ def call_model_transformer(sources, direction_trans):
             # Desubword the target sentences
             translations_desubword = sp.decode(translations)
     elif direction_trans == "Myanmar to English":
         ct_model_path = "myen_ctranslate2/"
@@ -131,12 +157,26 @@ def call_model_transformer(sources, direction_trans):
         if sources == "" :
             gr.Warning("Please Enter Myanmar Text")
         else:
-            sp_source_model = sp.load(sp_source_model_path)
-            sp_target_model = sp.load(sp_target_model_path)
-            #translator = ctranslate2.Translator(ct_model_path)
-            sources_seg = write_to_file_myanmar(sources)
-            #Subword the source sentences
-            source_sents_subworded = sp.encode_as_pieces([sources_seg])
             # Translate the source sentences
             translator = ctranslate2.Translator(ct_model_path, device="cpu")  # or "cuda" for GPU
@@ -147,12 +187,24 @@ def call_model_transformer(sources, direction_trans):
             sp.load(sp_target_model_path)
             # Desubword the target sentences
-            translations_desubword = sp.decode(translations)
     else: gr.Warning("Please Select Language Direction")
-    return translations_desubword
 def translate_trans_myen(source, translator, sp_source_model, sp_target_model):

     return segmented_contents
+def call_model_transformer(source, direction_trans):
     if direction_trans == "English to Myanmar":
         ct_model_path = "enmy_ctranslate2/"
         sp_source_model_path = "enmy_ctranslate2/source.model"
         sp_target_model_path = "enmy_ctranslate2/target.model"
+        if source == "" :
             gr.Warning("Please Enter English Text")
         else:
+            #Set file paths
+            source_file_path = "write-input.txt"
+            target_file_path = "read-output.txt"
+            # Load the source SentecePiece model
+            sp = spm.SentencePieceProcessor()
+            sp.load(sp_source_model_path)
+            # write source to file
+            with open(source_file_path, "w", encoding="utf-8") as file:
+                file.write(source)
+            # Open the source file
+            with open(source_file_path, "r") as source:
+                  lines = source.readlines()
+            source_sents = [line.strip() for line in lines]
             # Subword the source sentences
+            source_sents_subworded = sp.encode_as_pieces(source_sents)
             # Translate the source sentences
             translator = ctranslate2.Translator(ct_model_path, device="cpu")  # or "cuda" for GPU
             translations = translator.translate_batch(source_sents_subworded, batch_type="tokens", max_batch_size=4096)
             # Desubword the target sentences
             translations_desubword = sp.decode(translations)
+            # Save the translations to the a file
+            with open(target_file_path, "w+", encoding="utf-8") as target:
+                for line in translations_desubword:
+                    target.write(line.strip() + "\n")
+            #print("Done")
+            with open(target_file_path, "r", encoding="utf-8") as file:
+                segmented_contents = file.read()
     elif direction_trans == "Myanmar to English":
         ct_model_path = "myen_ctranslate2/"
         if sources == "" :
             gr.Warning("Please Enter Myanmar Text")
         else:
+            #Set file paths
+            source_file_path = "write-input.txt"
+            target_file_path = "read-output.txt"
+            # Load the source SentecePiece model
+            sp = spm.SentencePieceProcessor()
+            sp.load(sp_source_model_path)
+            # write source to file
+            with open(source_file_path, "w", encoding="utf-8") as file:
+                file.write(source)
+            # Open the source file
+            with open(source_file_path, "r") as source:
+                  lines = source.readlines()
+            source_sents = [line.strip() for line in lines]
+            # Subword the source sentences
+            source_sents_subworded = sp.encode_as_pieces(source_sents)
             # Translate the source sentences
             translator = ctranslate2.Translator(ct_model_path, device="cpu")  # or "cuda" for GPU
             sp.load(sp_target_model_path)
             # Desubword the target sentences
+            translations_desubword = sp.decode(translations)
+            # Save the translations to the a file
+            with open(target_file_path, "w+", encoding="utf-8") as target:
+                for line in translations_desubword:
+                    target.write(line.strip() + "\n")
+            #print("Done")
+            with open(target_file_path, "r", encoding="utf-8") as file:
+                segmented_contents = file.read()
     else: gr.Warning("Please Select Language Direction")
+    return segmented_contents
 def translate_trans_myen(source, translator, sp_source_model, sp_target_model):