SadTalker

Build error

App Files Files Community

lithiumice commited on Apr 15, 2023

Commit

36aab19

1 Parent(s): 0dbf11e

add ht api

Browse files

Files changed (5) hide show

.gitignore +2 -1
app.py +23 -6
requirements.txt +3 -1
src/gradio_demo.py +1 -1
src/utils/text2speech.py +20 -6

.gitignore CHANGED Viewed

@@ -153,7 +153,8 @@ dmypy.json
 cython_debug/
 results/
-# checkpoints/
 gradio_cached_examples/
 gfpgan/
 start.sh

 cython_debug/
 results/
+checkpoints/
+checkpoints_win/
 gradio_cached_examples/
 gfpgan/
 start.sh

app.py CHANGED Viewed

@@ -15,9 +15,17 @@ def download_model():
 def sadtalker_demo():
-    download_model()
-    sad_talker = SadTalker(lazy_load=True)
     tts_talker = TTSTalker()
     tts_talker_ht = TTSTalkerPlayHT()
@@ -55,12 +63,16 @@ def sadtalker_demo():
                 with gr.Tabs(elem_id="sadtalker_driven_audio"):
                     with gr.TabItem('Play.ht: Upload OR TTS'):
                         with gr.Column(variant='panel'):
                             driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
                         with gr.Column(variant='panel'):
                             input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
                             tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
-                            tts.click(fn=tts_talker_ht.test, inputs=[input_text], outputs=[driven_audio])
                 # origin TTS
@@ -159,10 +171,15 @@ def sadtalker_demo():
                             driven_audio,
                             preprocess_type,
                             is_still_mode,
-                            enhancer],
                         outputs=[gen_video],
                         fn=sad_talker.test,
-                        cache_examples=os.getenv('SYSTEM') == 'spaces') #
         submit.click(
                     fn=sad_talker.test,

 def sadtalker_demo():
+    # 这部分的逻辑是在我的笔记本上本地运行和在服务器上运行的时候，模型的路径不一样，所以需要做一下判断
+    import platform
+    if platform.system() != 'Windows':
+        download_model()
+    sad_talker = SadTalker(
+        lazy_load=True,
+        checkpoint_path='./checkpoints' if platform.system() == 'Linux' else 'checkpoints_win',
+        )
     tts_talker = TTSTalker()
     tts_talker_ht = TTSTalkerPlayHT()
                 with gr.Tabs(elem_id="sadtalker_driven_audio"):
                     with gr.TabItem('Play.ht: Upload OR TTS'):
                         with gr.Column(variant='panel'):
+                            gr.Markdown("find more info here: https://playht.github.io/api-docs-generator/#standard-api-voices")
                             driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
+                            ht_user_id = gr.Textbox(label="Play.ht User ID", lines=1, placeholder="(required)", value='96tPb0H2cXbobV9u8iLVGyJPUPc2')
+                            ht_auth_key = gr.Textbox(label="Play.ht Auth Key", lines=1, placeholder="(required)",value='f35fc9d7ce0549a88f6cdc15ec860b6e')
+                            ht_voice = gr.Textbox(label="Play.ht Voice Type, see <Standard API Voices> section in API doc", lines=1, placeholder="(required)",value='en-US-MichelleNeural')
                         with gr.Column(variant='panel'):
                             input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
                             tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
+                            tts.click(fn=tts_talker_ht.test, inputs=[input_text, ht_user_id, ht_auth_key,ht_voice], outputs=[driven_audio])
                 # origin TTS
                             driven_audio,
                             preprocess_type,
                             is_still_mode,
+                            enhancer,
+                            # ht_user_id,
+                            # ht_auth_key
+                            ],
                         outputs=[gen_video],
                         fn=sad_talker.test,
+                        cache_examples=False
+                        # cache_examples=os.getenv('SYSTEM') == 'spaces'
+                        ) #
         submit.click(
                     fn=sad_talker.test,

requirements.txt CHANGED Viewed

@@ -20,4 +20,6 @@ basicsr==1.4.2
 facexlib==0.2.5
 dlib-bin
 gfpgan
-TTS==0.13.0

 facexlib==0.2.5
 dlib-bin
 gfpgan
+TTS==0.13.0
+requests
+# tempfile

src/gradio_demo.py CHANGED Viewed

@@ -127,7 +127,7 @@ class SadTalker():
             del self.audio_to_coeff
             del self.animate_from_coeff
-        if torch.cuda.is_available() :
             torch.cuda.empty_cache()
             torch.cuda.synchronize()
         import gc; gc.collect()

             del self.audio_to_coeff
             del self.animate_from_coeff
+        if torch.cuda.is_available():
             torch.cuda.empty_cache()
             torch.cuda.synchronize()
         import gc; gc.collect()

src/utils/text2speech.py CHANGED Viewed

@@ -20,7 +20,6 @@ class TTSTalker():
         return tempf.name
-import urllib.request
 import tempfile
 import requests
 import json
@@ -36,16 +35,26 @@ class TTSTalkerPlayHT():
             text = 'hello world'
         self.url = "https://play.ht/api/v1"
         self.headers = {
-            'Authorization': 'f35fc9d7ce0549a88f6cdc15ec860b6e',
-            'X-User-ID': '96tPb0H2cXbobV9u8iLVGyJPUPc2',
             'Content-Type': 'application/json'
         }
-    def test(self, text, language='en', **kwargs):
         payload = json.dumps({
             "title": "Testing public api convertion",
-            "voice": "en-US-MichelleNeural",
             "content": [text],
         })
         get_url = self.url+f'/convert'
@@ -55,6 +64,8 @@ class TTSTalkerPlayHT():
             headers=self.headers,
             data=payload)
         if response.status_code == 404:
             print('404')
             return
@@ -84,6 +95,8 @@ class TTSTalkerPlayHT():
                     headers=self.headers,
                 )
             if response.status_code == 404:
                 print(response.text)
                 print('404')
@@ -93,7 +106,7 @@ class TTSTalkerPlayHT():
             data = json.loads(response.text)
             converted = data['converted']
             if converted != True:
-                time.sleep(0.5)
                 continue
         # articleStatus 表示转换完成
@@ -119,6 +132,7 @@ class TTSTalkerPlayHT():
         download_dropbox_url(audioUrl, tempf.name)
         # urllib.request.urlretrieve(audioUrl, tempf.name)
         # response = requests.get(audioUrl)

         return tempf.name
 import tempfile
 import requests
 import json
             text = 'hello world'
         self.url = "https://play.ht/api/v1"
+    def test(self, text,
+             ht_user_id = '96tPb0H2cXbobV9u8iLVGyJPUPc2',
+             ht_auth_key = 'f35fc9d7ce0549a88f6cdc15ec860b6e',
+             voice = 'en-US-MichelleNeural',
+             **kwargs):
+        print(f'ht_user_id {ht_user_id}')
+        print(f'ht_auth_key {ht_auth_key}')
         self.headers = {
+            'Authorization': ht_auth_key,
+            'X-User-ID': ht_user_id,
             'Content-Type': 'application/json'
         }
         payload = json.dumps({
             "title": "Testing public api convertion",
+            "voice": voice,
             "content": [text],
         })
         get_url = self.url+f'/convert'
             headers=self.headers,
             data=payload)
+        print(f'convert response.status_code {response.status_code}')
         if response.status_code == 404:
             print('404')
             return
                     headers=self.headers,
                 )
+            print(f'articleStatus response.status_code {response.status_code}')
             if response.status_code == 404:
                 print(response.text)
                 print('404')
             data = json.loads(response.text)
             converted = data['converted']
             if converted != True:
+                time.sleep(1.0)
                 continue
         # articleStatus 表示转换完成
         download_dropbox_url(audioUrl, tempf.name)
+        # import urllib.request
         # urllib.request.urlretrieve(audioUrl, tempf.name)
         # response = requests.get(audioUrl)