ν˜•κ·œ 솑 commited on
Commit
6d97505
β€’
1 Parent(s): 6bd388c

add length checker

Browse files

(`c838b62` in https://bitbucket.org/maum-system/cvpr22-demo-gradio)

Files changed (3) hide show
  1. app.py +21 -12
  2. docs/description.md +4 -0
  3. translator/module.py +23 -12
app.py CHANGED
@@ -25,10 +25,11 @@ DESCRIPTION = Path("docs/description.md").read_text()
25
  class GradioApplication:
26
  def __init__(self, rest_ip, rest_port, max_seed):
27
  self.lang_list = {
28
- 'Korean': 'ko_KR',
29
- 'English': 'en_US',
30
- 'Japanese': 'ja_JP',
31
- 'Chinese': 'zh_CN'
 
32
  }
33
  self.background_list = [None,
34
  "background_image/cvpr.png",
@@ -85,14 +86,14 @@ class GradioApplication:
85
  return background_data, is_video_background
86
 
87
  @staticmethod
88
- def return_format(toxicity_prob, target_text, lang_dest, video_filename):
89
- return {'Toxicity': toxicity_prob}, f"Language: {lang_dest}\nText: \n{target_text}", str(video_filename)
90
 
91
  def infer(self, text, lang, duration_rate, action, background_index):
92
  self._counter_file_seed()
93
  print(f"File Seed: {self._file_seed}")
94
  toxicity_prob = 0.0
95
- target_text = "(Sorry, it seems that the input text is too toxic.)"
96
  lang_dest = ""
97
  video_filename = "vacant.mp4"
98
 
@@ -103,16 +104,24 @@ class GradioApplication:
103
  pass
104
 
105
  if toxicity_prob > TOXICITY_THRESHOLD:
106
- return self.return_format(toxicity_prob, target_text, lang_dest, video_filename)
 
107
 
108
  # Google Translate API
109
  try:
110
  target_text, lang_dest = self.translator.get_translation(text, lang)
111
- lang_rpc_code = self.get_lang_code(lang_dest)
112
  except Exception as e:
113
- target_text = f"Error from language translation: ({e})"
114
  lang_dest = ""
115
- return self.return_format(toxicity_prob, target_text, lang_dest, video_filename)
 
 
 
 
 
 
 
 
116
 
117
  # Video Inference
118
  background_data, is_video_background = self.get_background_data(background_index)
@@ -125,7 +134,7 @@ class GradioApplication:
125
  with open(video_filename, "wb") as video_file:
126
  video_file.write(video_data)
127
 
128
- return {'Toxicity': toxicity_prob}, f"Language: {lang_dest}\nText: \n{target_text}", str(video_filename)
129
 
130
  def run(self, server_port=7860, share=False):
131
  try:
25
  class GradioApplication:
26
  def __init__(self, rest_ip, rest_port, max_seed):
27
  self.lang_list = {
28
+ 'ko': 'ko_KR',
29
+ 'en': 'en_US',
30
+ 'ja': 'ja_JP',
31
+ 'zh': 'zh_CN',
32
+ 'zh-CN': 'zh_CN'
33
  }
34
  self.background_list = [None,
35
  "background_image/cvpr.png",
86
  return background_data, is_video_background
87
 
88
  @staticmethod
89
+ def return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=""):
90
+ return {'Toxicity': toxicity_prob}, f"Language: {lang_dest}\nText: {target_text}\nDetails: {detail}", str(video_filename)
91
 
92
  def infer(self, text, lang, duration_rate, action, background_index):
93
  self._counter_file_seed()
94
  print(f"File Seed: {self._file_seed}")
95
  toxicity_prob = 0.0
96
+ target_text = ""
97
  lang_dest = ""
98
  video_filename = "vacant.mp4"
99
 
104
  pass
105
 
106
  if toxicity_prob > TOXICITY_THRESHOLD:
107
+ detail = "Sorry, it seems that the input text is too toxic."
108
+ return self.return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=detail)
109
 
110
  # Google Translate API
111
  try:
112
  target_text, lang_dest = self.translator.get_translation(text, lang)
 
113
  except Exception as e:
114
+ target_text = ""
115
  lang_dest = ""
116
+ detail = f"Error from language translation: ({e})"
117
+ return self.return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=detail)
118
+
119
+ try:
120
+ self.translator.length_check(lang_dest, target_text) # assertion check
121
+ except AssertionError as e:
122
+ return self.return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=str(e))
123
+
124
+ lang_rpc_code = self.get_lang_code(lang_dest)
125
 
126
  # Video Inference
127
  background_data, is_video_background = self.get_background_data(background_index)
134
  with open(video_filename, "wb") as video_file:
135
  video_file.write(video_data)
136
 
137
+ return self.return_format(toxicity_prob, target_text, lang_dest, video_filename)
138
 
139
  def run(self, server_port=7860, share=False):
140
  try:
docs/description.md CHANGED
@@ -5,6 +5,10 @@ If the input text language and the target language are different, the input text
5
 
6
  ### Updates
7
 
 
 
 
 
8
  (2022.06.17.) We were originally planning to support any input text. However, when checking the logs recently, we found that there were a lot of inappropriate input texts. So, we decided to filter the inputs based on toxicity using [Perspective API @Google](https://developers.perspectiveapi.com/s/). Now, if you enter a possibily toxic text, the video generation will fail. We hope you understand.
9
 
10
  (2022.06.05.) Due to the latency from HuggingFace Spaces and video rendering, it takes 15 ~ 30 seconds to get a video result.
5
 
6
  ### Updates
7
 
8
+ (2022.06.17.) Thank you for visiting our demo!😊 This demo attracted a lot more attention than we anticipated. This, unfortunately, means that the computational burden is heavier than this demo was designed for. So, to maximize everyone's experience, we capped the length of the translated texts at:
9
+ - 200 characters for English
10
+ - 100 characters for Chinese, Japaense, and Korean.
11
+
12
  (2022.06.17.) We were originally planning to support any input text. However, when checking the logs recently, we found that there were a lot of inappropriate input texts. So, we decided to filter the inputs based on toxicity using [Perspective API @Google](https://developers.perspectiveapi.com/s/). Now, if you enter a possibily toxic text, the video generation will fail. We hope you understand.
13
 
14
  (2022.06.05.) Due to the latency from HuggingFace Spaces and video rendering, it takes 15 ~ 30 seconds to get a video result.
translator/module.py CHANGED
@@ -1,17 +1,34 @@
1
  from .v3 import GoogleAuthTranslation
2
  from pathlib import Path
3
  import yaml
 
4
 
 
 
5
 
6
  class Translator:
7
  def __init__(self, yaml_path='./lang.yaml'):
8
  self.google_translation = GoogleAuthTranslation(project_id="cvpr-2022-demonstration")
9
  with open(yaml_path) as f:
10
  self.supporting_languages = yaml.load(f, Loader=yaml.FullLoader)
11
-
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def _get_text_with_lang(self, text, lang):
13
  lang_detected = self.google_translation.detect(text)
14
- print(lang_detected, lang)
 
15
  if lang is None:
16
  lang = lang_detected
17
 
@@ -19,22 +36,16 @@ class Translator:
19
  target_text = self.google_translation.translate(text, lang=lang)
20
  else:
21
  target_text = text
22
-
23
  return target_text, lang
24
 
25
  def _convert_lang_from_index(self, lang):
26
  try:
27
- lang_finder = [name for name in self.supporting_languages
28
- if self.supporting_languages[name]['language'] == lang]
29
  except Exception as e:
30
  raise RuntimeError(e)
31
 
32
- if len(lang_finder) == 1:
33
- lang = lang_finder[0]
34
- else:
35
- raise AssertionError("Given language index can't be understood!"
36
- f"Only one of ['Korean', 'English', 'Japanese', 'Chinese'] can be supported. | lang: {lang}")
37
-
38
  return lang
39
 
40
  def get_translation(self, text, lang, use_translation=True):
@@ -45,4 +56,4 @@ class Translator:
45
  else:
46
  target_text = text
47
 
48
- return target_text, lang
1
  from .v3 import GoogleAuthTranslation
2
  from pathlib import Path
3
  import yaml
4
+ import os
5
 
6
+ MAX_ENG_TEXT_LENGTH = int(os.getenv('MAX_ENG_TEXT_LENGTH', 200))
7
+ MAX_CJK_TEXT_LENGTH = int(os.getenv('MAX_CJK_TEXT_LENGTH', 100))
8
 
9
  class Translator:
10
  def __init__(self, yaml_path='./lang.yaml'):
11
  self.google_translation = GoogleAuthTranslation(project_id="cvpr-2022-demonstration")
12
  with open(yaml_path) as f:
13
  self.supporting_languages = yaml.load(f, Loader=yaml.FullLoader)
14
+
15
+ @staticmethod
16
+ def length_check(lang, text):
17
+ if lang in ['en']:
18
+ if len(text) > MAX_ENG_TEXT_LENGTH:
19
+ raise AssertionError(f"Input text is too long. For English, the text length should be less than {MAX_ENG_TEXT_LENGTH}. | Length: {len(text)}")
20
+ elif lang in ['ko', 'ja', 'zh-CN', 'zh']:
21
+ if len(text) > MAX_CJK_TEXT_LENGTH:
22
+ raise AssertionError(f"Input text is too long. For CJK, the text length should be less than {MAX_CJK_TEXT_LENGTH}. | Length: {len(text)}")
23
+ else:
24
+ raise AssertionError(f"Not in ['ko', 'ja', 'zh-CN', 'zh', 'en'] ! | Language: {lang}")
25
+
26
+ return
27
+
28
  def _get_text_with_lang(self, text, lang):
29
  lang_detected = self.google_translation.detect(text)
30
+ print(f"Detected as: {lang_detected} | Destination: {lang}")
31
+
32
  if lang is None:
33
  lang = lang_detected
34
 
36
  target_text = self.google_translation.translate(text, lang=lang)
37
  else:
38
  target_text = text
39
+
40
  return target_text, lang
41
 
42
  def _convert_lang_from_index(self, lang):
43
  try:
44
+ lang = [name for name in self.supporting_languages
45
+ if self.supporting_languages[name]['language'] == lang][0]
46
  except Exception as e:
47
  raise RuntimeError(e)
48
 
 
 
 
 
 
 
49
  return lang
50
 
51
  def get_translation(self, text, lang, use_translation=True):
56
  else:
57
  target_text = text
58
 
59
+ return target_text, lang_