Spaces:

eepj
/

wstcg-mt

Running

App Files Files Community

Pan commited on Apr 24, 2024

Commit

dae6fae

1 Parent(s): 94aabbd

Update layout and string post-processing function

Browse files

Files changed (2) hide show

app.py +11 -11
utils.py +0 -3

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from transformers import pipeline
 import ast
 import os
-import re
 import utils
@@ -14,18 +13,16 @@ pipe.model.eval()
 def func(text_ja: str, sub_emoji: bool) -> str:
-    # Format the input string to replace emoji and newline characters
     text_ja = utils.format_input(text_ja)
-    # Split input text into sentence pairs
-    splits_ja = [s for s in re.findall(r'『.+』|【.+?】.+?。|（.+?）|.+?。', text_ja) if s]
-    pairs_ja = [(splits_ja[i], splits_ja[i + 1]) if i + 1 < len(splits_ja)
-                else (splits_ja[i]) for i in range(0, len(splits_ja), 2)]
     # Iterate through each sentence pair
     segs_en = []
-    for pair_ja in pairs_ja:
         # Join a sentence pair
-        seg_ja = "".join(pair_ja)
         # Replace named entites with placeholder tokens (<NAME> and <TRAIT>)
         seg_ja, repls = utils.text_to_placeholder_tokens(seg_ja)
         # Translate the sentence pair
@@ -38,16 +35,18 @@ def func(text_ja: str, sub_emoji: bool) -> str:
         segs_en.append(seg_en)
     # Join the translated segments as one output strings
-    text_en = " ".join(segs_en)
     return text_en
 with gr.Blocks() as app:
     with gr.Row():
         with gr.Column():
-            input_box = gr.TextArea(label="Original Card Text", info="Japanese")
-            output_box = gr.TextArea(label="Translated Card Text", info="English")
             sub_emoji = gr.Checkbox(True, label="Show Trigger Icon Emojis", info="Optional")
@@ -63,6 +62,7 @@ with gr.Blocks() as app:
                         inputs=[input_box, sub_emoji],
                         fn=func, outputs=[output_box],
                         label="Example Text", api_name=False, cache_examples=False)
         except (KeyError, SyntaxError, ValueError) as err:
             print(err)

 import ast
 import os
 import utils
 def func(text_ja: str, sub_emoji: bool) -> str:
+    # Format the input string to replace emoji
     text_ja = utils.format_input(text_ja)
+    # Split text by line
+    splits_ja = [s for s in text_ja.splitlines() if s]
     # Iterate through each sentence pair
     segs_en = []
+    for s in splits_ja:
         # Join a sentence pair
+        seg_ja = "".join(s)
         # Replace named entites with placeholder tokens (<NAME> and <TRAIT>)
         seg_ja, repls = utils.text_to_placeholder_tokens(seg_ja)
         # Translate the sentence pair
         segs_en.append(seg_en)
     # Join the translated segments as one output strings
+    text_en = "\n".join(segs_en)
     return text_en
 with gr.Blocks() as app:
     with gr.Row():
         with gr.Column():
+            input_box = gr.TextArea(label="Original Card Text",
+                                    info="Put each ability on a new line")
+            output_box = gr.TextArea(label="Translated Card Text")
             sub_emoji = gr.Checkbox(True, label="Show Trigger Icon Emojis", info="Optional")
                         inputs=[input_box, sub_emoji],
                         fn=func, outputs=[output_box],
                         label="Example Text", api_name=False, cache_examples=False)
         except (KeyError, SyntaxError, ValueError) as err:
             print(err)

utils.py CHANGED Viewed

@@ -3,8 +3,6 @@ from tokens import *
 def format_input(text_ja: str) -> str:
-    text_ja = re.sub("\n", "", text_ja)
     for token, emoji in TRIGGER_EMOJI_DICT.items():
         text_ja = re.sub(emoji, token, text_ja)
@@ -33,7 +31,6 @@ def placeholder_tokens_to_text(text_en: str, repls: dict[str, list[str]]) -> str
 def format_output(text_en: str, sub_emoji: bool = True) -> str:
-    text_en = re.sub(r'^([^【]+?】)', r'【 \1', text_en)
     text_en = re.sub(r'【\s(.+?)\s】\s?', r'【\1】', text_en)
     if sub_emoji:

 def format_input(text_ja: str) -> str:
     for token, emoji in TRIGGER_EMOJI_DICT.items():
         text_ja = re.sub(emoji, token, text_ja)
 def format_output(text_en: str, sub_emoji: bool = True) -> str:
     text_en = re.sub(r'【\s(.+?)\s】\s?', r'【\1】', text_en)
     if sub_emoji: