Pan commited on
Commit
dae6fae
·
1 Parent(s): 94aabbd

Update layout and string post-processing function

Browse files
Files changed (2) hide show
  1. app.py +11 -11
  2. utils.py +0 -3
app.py CHANGED
@@ -3,7 +3,6 @@ from transformers import pipeline
3
 
4
  import ast
5
  import os
6
- import re
7
 
8
  import utils
9
 
@@ -14,18 +13,16 @@ pipe.model.eval()
14
 
15
 
16
  def func(text_ja: str, sub_emoji: bool) -> str:
17
- # Format the input string to replace emoji and newline characters
18
  text_ja = utils.format_input(text_ja)
19
- # Split input text into sentence pairs
20
- splits_ja = [s for s in re.findall(r'『.+』|【.+?】.+?。|(.+?)|.+?。', text_ja) if s]
21
- pairs_ja = [(splits_ja[i], splits_ja[i + 1]) if i + 1 < len(splits_ja)
22
- else (splits_ja[i]) for i in range(0, len(splits_ja), 2)]
23
 
24
  # Iterate through each sentence pair
25
  segs_en = []
26
- for pair_ja in pairs_ja:
27
  # Join a sentence pair
28
- seg_ja = "".join(pair_ja)
29
  # Replace named entites with placeholder tokens (<NAME> and <TRAIT>)
30
  seg_ja, repls = utils.text_to_placeholder_tokens(seg_ja)
31
  # Translate the sentence pair
@@ -38,16 +35,18 @@ def func(text_ja: str, sub_emoji: bool) -> str:
38
  segs_en.append(seg_en)
39
 
40
  # Join the translated segments as one output strings
41
- text_en = " ".join(segs_en)
42
 
43
  return text_en
44
 
45
 
46
  with gr.Blocks() as app:
 
47
  with gr.Row():
48
  with gr.Column():
49
- input_box = gr.TextArea(label="Original Card Text", info="Japanese")
50
- output_box = gr.TextArea(label="Translated Card Text", info="English")
 
51
 
52
  sub_emoji = gr.Checkbox(True, label="Show Trigger Icon Emojis", info="Optional")
53
 
@@ -63,6 +62,7 @@ with gr.Blocks() as app:
63
  inputs=[input_box, sub_emoji],
64
  fn=func, outputs=[output_box],
65
  label="Example Text", api_name=False, cache_examples=False)
 
66
  except (KeyError, SyntaxError, ValueError) as err:
67
  print(err)
68
 
 
3
 
4
  import ast
5
  import os
 
6
 
7
  import utils
8
 
 
13
 
14
 
15
  def func(text_ja: str, sub_emoji: bool) -> str:
16
+ # Format the input string to replace emoji
17
  text_ja = utils.format_input(text_ja)
18
+ # Split text by line
19
+ splits_ja = [s for s in text_ja.splitlines() if s]
 
 
20
 
21
  # Iterate through each sentence pair
22
  segs_en = []
23
+ for s in splits_ja:
24
  # Join a sentence pair
25
+ seg_ja = "".join(s)
26
  # Replace named entites with placeholder tokens (<NAME> and <TRAIT>)
27
  seg_ja, repls = utils.text_to_placeholder_tokens(seg_ja)
28
  # Translate the sentence pair
 
35
  segs_en.append(seg_en)
36
 
37
  # Join the translated segments as one output strings
38
+ text_en = "\n".join(segs_en)
39
 
40
  return text_en
41
 
42
 
43
  with gr.Blocks() as app:
44
+
45
  with gr.Row():
46
  with gr.Column():
47
+ input_box = gr.TextArea(label="Original Card Text",
48
+ info="Put each ability on a new line")
49
+ output_box = gr.TextArea(label="Translated Card Text")
50
 
51
  sub_emoji = gr.Checkbox(True, label="Show Trigger Icon Emojis", info="Optional")
52
 
 
62
  inputs=[input_box, sub_emoji],
63
  fn=func, outputs=[output_box],
64
  label="Example Text", api_name=False, cache_examples=False)
65
+
66
  except (KeyError, SyntaxError, ValueError) as err:
67
  print(err)
68
 
utils.py CHANGED
@@ -3,8 +3,6 @@ from tokens import *
3
 
4
 
5
  def format_input(text_ja: str) -> str:
6
- text_ja = re.sub("\n", "", text_ja)
7
-
8
  for token, emoji in TRIGGER_EMOJI_DICT.items():
9
  text_ja = re.sub(emoji, token, text_ja)
10
 
@@ -33,7 +31,6 @@ def placeholder_tokens_to_text(text_en: str, repls: dict[str, list[str]]) -> str
33
 
34
 
35
  def format_output(text_en: str, sub_emoji: bool = True) -> str:
36
- text_en = re.sub(r'^([^【]+?】)', r'【 \1', text_en)
37
  text_en = re.sub(r'【\s(.+?)\s】\s?', r'【\1】', text_en)
38
 
39
  if sub_emoji:
 
3
 
4
 
5
  def format_input(text_ja: str) -> str:
 
 
6
  for token, emoji in TRIGGER_EMOJI_DICT.items():
7
  text_ja = re.sub(emoji, token, text_ja)
8
 
 
31
 
32
 
33
  def format_output(text_en: str, sub_emoji: bool = True) -> str:
 
34
  text_en = re.sub(r'【\s(.+?)\s】\s?', r'【\1】', text_en)
35
 
36
  if sub_emoji: