Spaces:

CVPR
/

ml-talking-face

Running

App Files Files Community

형규 송 commited on Jun 17, 2022

Commit

6d97505

•

1 Parent(s): 6bd388c

add length checker

Browse files

(`c838b62` in https://bitbucket.org/maum-system/cvpr22-demo-gradio)

Files changed (3) hide show

app.py +21 -12
docs/description.md +4 -0
translator/module.py +23 -12

app.py CHANGED Viewed

@@ -25,10 +25,11 @@ DESCRIPTION = Path("docs/description.md").read_text()
 class GradioApplication:
     def __init__(self, rest_ip, rest_port, max_seed):
         self.lang_list = {
-            'Korean': 'ko_KR',
-            'English': 'en_US',
-            'Japanese': 'ja_JP',
-            'Chinese': 'zh_CN'
         }
         self.background_list = [None,
                                 "background_image/cvpr.png",
@@ -85,14 +86,14 @@ class GradioApplication:
         return background_data, is_video_background
     @staticmethod
-    def return_format(toxicity_prob, target_text, lang_dest, video_filename):
-        return {'Toxicity': toxicity_prob}, f"Language: {lang_dest}\nText: \n{target_text}", str(video_filename)
     def infer(self, text, lang, duration_rate, action, background_index):
         self._counter_file_seed()
         print(f"File Seed: {self._file_seed}")
         toxicity_prob = 0.0
-        target_text = "(Sorry, it seems that the input text is too toxic.)"
         lang_dest = ""
         video_filename = "vacant.mp4"
@@ -103,16 +104,24 @@ class GradioApplication:
             pass
         if toxicity_prob > TOXICITY_THRESHOLD:
-            return self.return_format(toxicity_prob, target_text, lang_dest, video_filename)
         # Google Translate API
         try:
             target_text, lang_dest = self.translator.get_translation(text, lang)
-            lang_rpc_code = self.get_lang_code(lang_dest)
         except Exception as e:
-            target_text = f"Error from language translation: ({e})"
             lang_dest = ""
-            return self.return_format(toxicity_prob, target_text, lang_dest, video_filename)
         # Video Inference
         background_data, is_video_background = self.get_background_data(background_index)
@@ -125,7 +134,7 @@ class GradioApplication:
         with open(video_filename, "wb") as video_file:
             video_file.write(video_data)
-        return {'Toxicity': toxicity_prob}, f"Language: {lang_dest}\nText: \n{target_text}", str(video_filename)
     def run(self, server_port=7860, share=False):
         try:

 class GradioApplication:
     def __init__(self, rest_ip, rest_port, max_seed):
         self.lang_list = {
+            'ko': 'ko_KR',
+            'en': 'en_US',
+            'ja': 'ja_JP',
+            'zh': 'zh_CN',
+            'zh-CN': 'zh_CN'
         }
         self.background_list = [None,
                                 "background_image/cvpr.png",
         return background_data, is_video_background
     @staticmethod
+    def return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=""):
+        return {'Toxicity': toxicity_prob}, f"Language: {lang_dest}\nText: {target_text}\nDetails: {detail}", str(video_filename)
     def infer(self, text, lang, duration_rate, action, background_index):
         self._counter_file_seed()
         print(f"File Seed: {self._file_seed}")
         toxicity_prob = 0.0
+        target_text = ""
         lang_dest = ""
         video_filename = "vacant.mp4"
             pass
         if toxicity_prob > TOXICITY_THRESHOLD:
+            detail = "Sorry, it seems that the input text is too toxic."
+            return self.return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=detail)
         # Google Translate API
         try:
             target_text, lang_dest = self.translator.get_translation(text, lang)
         except Exception as e:
+            target_text = ""
             lang_dest = ""
+            detail = f"Error from language translation: ({e})"
+            return self.return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=detail)
+        try:
+            self.translator.length_check(lang_dest, target_text)  # assertion check
+        except AssertionError as e:
+            return self.return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=str(e))
+        lang_rpc_code = self.get_lang_code(lang_dest)
         # Video Inference
         background_data, is_video_background = self.get_background_data(background_index)
         with open(video_filename, "wb") as video_file:
             video_file.write(video_data)
+        return self.return_format(toxicity_prob, target_text, lang_dest, video_filename)
     def run(self, server_port=7860, share=False):
         try:

docs/description.md CHANGED Viewed

@@ -5,6 +5,10 @@ If the input text language and the target language are different, the input text
 ### Updates
 (2022.06.17.) We were originally planning to support any input text. However, when checking the logs recently, we found that there were a lot of inappropriate input texts. So, we decided to filter the inputs based on toxicity using [Perspective API @Google](https://developers.perspectiveapi.com/s/). Now, if you enter a possibily toxic text, the video generation will fail. We hope you understand.
 (2022.06.05.) Due to the latency from HuggingFace Spaces and video rendering, it takes 15 ~ 30 seconds to get a video result.

 ### Updates
+(2022.06.17.) Thank you for visiting our demo!😊 This demo attracted a lot more attention than we anticipated. This, unfortunately, means that the computational burden is heavier than this demo was designed for. So, to maximize everyone's experience, we capped the length of the translated texts at:
+- 200 characters for English
+- 100 characters for Chinese, Japaense, and Korean.
 (2022.06.17.) We were originally planning to support any input text. However, when checking the logs recently, we found that there were a lot of inappropriate input texts. So, we decided to filter the inputs based on toxicity using [Perspective API @Google](https://developers.perspectiveapi.com/s/). Now, if you enter a possibily toxic text, the video generation will fail. We hope you understand.
 (2022.06.05.) Due to the latency from HuggingFace Spaces and video rendering, it takes 15 ~ 30 seconds to get a video result.

translator/module.py CHANGED Viewed

@@ -1,17 +1,34 @@
 from .v3 import GoogleAuthTranslation
 from pathlib import Path
 import yaml
 class Translator:
     def __init__(self, yaml_path='./lang.yaml'):
         self.google_translation = GoogleAuthTranslation(project_id="cvpr-2022-demonstration")
         with open(yaml_path) as f:
             self.supporting_languages = yaml.load(f, Loader=yaml.FullLoader)
     def _get_text_with_lang(self, text, lang):
         lang_detected = self.google_translation.detect(text)
-        print(lang_detected, lang)
         if lang is None:
             lang = lang_detected
@@ -19,22 +36,16 @@ class Translator:
             target_text = self.google_translation.translate(text, lang=lang)
         else:
             target_text = text
         return target_text, lang
     def _convert_lang_from_index(self, lang):
         try:
-            lang_finder = [name for name in self.supporting_languages
-                            if self.supporting_languages[name]['language'] == lang]
         except Exception as e:
             raise RuntimeError(e)
-        if len(lang_finder) == 1:
-            lang = lang_finder[0]
-        else:
-            raise AssertionError("Given language index can't be understood!"
-                                 f"Only one of ['Korean', 'English', 'Japanese', 'Chinese'] can be supported. | lang: {lang}")
         return lang
     def get_translation(self, text, lang, use_translation=True):
@@ -45,4 +56,4 @@ class Translator:
         else:
             target_text = text
-        return target_text, lang

 from .v3 import GoogleAuthTranslation
 from pathlib import Path
 import yaml
+import os
+MAX_ENG_TEXT_LENGTH = int(os.getenv('MAX_ENG_TEXT_LENGTH', 200))
+MAX_CJK_TEXT_LENGTH = int(os.getenv('MAX_CJK_TEXT_LENGTH', 100))
 class Translator:
     def __init__(self, yaml_path='./lang.yaml'):
         self.google_translation = GoogleAuthTranslation(project_id="cvpr-2022-demonstration")
         with open(yaml_path) as f:
             self.supporting_languages = yaml.load(f, Loader=yaml.FullLoader)
+    @staticmethod
+    def length_check(lang, text):
+        if lang in ['en']:
+            if len(text) > MAX_ENG_TEXT_LENGTH:
+                raise AssertionError(f"Input text is too long. For English, the text length should be less than {MAX_ENG_TEXT_LENGTH}. | Length: {len(text)}")
+        elif lang in ['ko', 'ja', 'zh-CN', 'zh']:
+            if len(text) > MAX_CJK_TEXT_LENGTH:
+                raise AssertionError(f"Input text is too long. For CJK, the text length should be less than {MAX_CJK_TEXT_LENGTH}. | Length: {len(text)}")
+        else:
+            raise AssertionError(f"Not in ['ko', 'ja', 'zh-CN', 'zh', 'en'] ! | Language: {lang}")
+        return
     def _get_text_with_lang(self, text, lang):
         lang_detected = self.google_translation.detect(text)
+        print(f"Detected as: {lang_detected} | Destination: {lang}")
         if lang is None:
             lang = lang_detected
             target_text = self.google_translation.translate(text, lang=lang)
         else:
             target_text = text
         return target_text, lang
     def _convert_lang_from_index(self, lang):
         try:
+            lang = [name for name in self.supporting_languages
+                    if self.supporting_languages[name]['language'] == lang][0]
         except Exception as e:
             raise RuntimeError(e)
         return lang
     def get_translation(self, text, lang, use_translation=True):
         else:
             target_text = text
+        return target_text, lang_