lithiumice commited on
Commit
36aab19
1 Parent(s): 0dbf11e

add ht api

Browse files
Files changed (5) hide show
  1. .gitignore +2 -1
  2. app.py +23 -6
  3. requirements.txt +3 -1
  4. src/gradio_demo.py +1 -1
  5. src/utils/text2speech.py +20 -6
.gitignore CHANGED
@@ -153,7 +153,8 @@ dmypy.json
153
  cython_debug/
154
 
155
  results/
156
- # checkpoints/
 
157
  gradio_cached_examples/
158
  gfpgan/
159
  start.sh
 
153
  cython_debug/
154
 
155
  results/
156
+ checkpoints/
157
+ checkpoints_win/
158
  gradio_cached_examples/
159
  gfpgan/
160
  start.sh
app.py CHANGED
@@ -15,9 +15,17 @@ def download_model():
15
 
16
  def sadtalker_demo():
17
 
18
- download_model()
19
-
20
- sad_talker = SadTalker(lazy_load=True)
 
 
 
 
 
 
 
 
21
  tts_talker = TTSTalker()
22
  tts_talker_ht = TTSTalkerPlayHT()
23
 
@@ -55,12 +63,16 @@ def sadtalker_demo():
55
  with gr.Tabs(elem_id="sadtalker_driven_audio"):
56
  with gr.TabItem('Play.ht: Upload OR TTS'):
57
  with gr.Column(variant='panel'):
 
58
  driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
 
 
 
59
 
60
  with gr.Column(variant='panel'):
61
  input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
62
  tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
63
- tts.click(fn=tts_talker_ht.test, inputs=[input_text], outputs=[driven_audio])
64
 
65
 
66
  # origin TTS
@@ -159,10 +171,15 @@ def sadtalker_demo():
159
  driven_audio,
160
  preprocess_type,
161
  is_still_mode,
162
- enhancer],
 
 
 
163
  outputs=[gen_video],
164
  fn=sad_talker.test,
165
- cache_examples=os.getenv('SYSTEM') == 'spaces') #
 
 
166
 
167
  submit.click(
168
  fn=sad_talker.test,
 
15
 
16
  def sadtalker_demo():
17
 
18
+ # 这部分的逻辑是在我的笔记本上本地运行和在服务器上运行的时候,模型的路径不一样,所以需要做一下判断
19
+ import platform
20
+ if platform.system() != 'Windows':
21
+ download_model()
22
+
23
+ sad_talker = SadTalker(
24
+ lazy_load=True,
25
+ checkpoint_path='./checkpoints' if platform.system() == 'Linux' else 'checkpoints_win',
26
+ )
27
+
28
+
29
  tts_talker = TTSTalker()
30
  tts_talker_ht = TTSTalkerPlayHT()
31
 
 
63
  with gr.Tabs(elem_id="sadtalker_driven_audio"):
64
  with gr.TabItem('Play.ht: Upload OR TTS'):
65
  with gr.Column(variant='panel'):
66
+ gr.Markdown("find more info here: https://playht.github.io/api-docs-generator/#standard-api-voices")
67
  driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
68
+ ht_user_id = gr.Textbox(label="Play.ht User ID", lines=1, placeholder="(required)", value='96tPb0H2cXbobV9u8iLVGyJPUPc2')
69
+ ht_auth_key = gr.Textbox(label="Play.ht Auth Key", lines=1, placeholder="(required)",value='f35fc9d7ce0549a88f6cdc15ec860b6e')
70
+ ht_voice = gr.Textbox(label="Play.ht Voice Type, see <Standard API Voices> section in API doc", lines=1, placeholder="(required)",value='en-US-MichelleNeural')
71
 
72
  with gr.Column(variant='panel'):
73
  input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
74
  tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
75
+ tts.click(fn=tts_talker_ht.test, inputs=[input_text, ht_user_id, ht_auth_key,ht_voice], outputs=[driven_audio])
76
 
77
 
78
  # origin TTS
 
171
  driven_audio,
172
  preprocess_type,
173
  is_still_mode,
174
+ enhancer,
175
+ # ht_user_id,
176
+ # ht_auth_key
177
+ ],
178
  outputs=[gen_video],
179
  fn=sad_talker.test,
180
+ cache_examples=False
181
+ # cache_examples=os.getenv('SYSTEM') == 'spaces'
182
+ ) #
183
 
184
  submit.click(
185
  fn=sad_talker.test,
requirements.txt CHANGED
@@ -20,4 +20,6 @@ basicsr==1.4.2
20
  facexlib==0.2.5
21
  dlib-bin
22
  gfpgan
23
- TTS==0.13.0
 
 
 
20
  facexlib==0.2.5
21
  dlib-bin
22
  gfpgan
23
+ TTS==0.13.0
24
+ requests
25
+ # tempfile
src/gradio_demo.py CHANGED
@@ -127,7 +127,7 @@ class SadTalker():
127
  del self.audio_to_coeff
128
  del self.animate_from_coeff
129
 
130
- if torch.cuda.is_available() :
131
  torch.cuda.empty_cache()
132
  torch.cuda.synchronize()
133
  import gc; gc.collect()
 
127
  del self.audio_to_coeff
128
  del self.animate_from_coeff
129
 
130
+ if torch.cuda.is_available():
131
  torch.cuda.empty_cache()
132
  torch.cuda.synchronize()
133
  import gc; gc.collect()
src/utils/text2speech.py CHANGED
@@ -20,7 +20,6 @@ class TTSTalker():
20
 
21
  return tempf.name
22
 
23
- import urllib.request
24
  import tempfile
25
  import requests
26
  import json
@@ -36,16 +35,26 @@ class TTSTalkerPlayHT():
36
  text = 'hello world'
37
 
38
  self.url = "https://play.ht/api/v1"
 
 
 
 
 
 
 
 
 
 
 
39
  self.headers = {
40
- 'Authorization': 'f35fc9d7ce0549a88f6cdc15ec860b6e',
41
- 'X-User-ID': '96tPb0H2cXbobV9u8iLVGyJPUPc2',
42
  'Content-Type': 'application/json'
43
  }
44
 
45
- def test(self, text, language='en', **kwargs):
46
  payload = json.dumps({
47
  "title": "Testing public api convertion",
48
- "voice": "en-US-MichelleNeural",
49
  "content": [text],
50
  })
51
  get_url = self.url+f'/convert'
@@ -55,6 +64,8 @@ class TTSTalkerPlayHT():
55
  headers=self.headers,
56
  data=payload)
57
 
 
 
58
  if response.status_code == 404:
59
  print('404')
60
  return
@@ -84,6 +95,8 @@ class TTSTalkerPlayHT():
84
  headers=self.headers,
85
  )
86
 
 
 
87
  if response.status_code == 404:
88
  print(response.text)
89
  print('404')
@@ -93,7 +106,7 @@ class TTSTalkerPlayHT():
93
  data = json.loads(response.text)
94
  converted = data['converted']
95
  if converted != True:
96
- time.sleep(0.5)
97
  continue
98
 
99
  # articleStatus 表示转换完成
@@ -119,6 +132,7 @@ class TTSTalkerPlayHT():
119
 
120
  download_dropbox_url(audioUrl, tempf.name)
121
 
 
122
  # urllib.request.urlretrieve(audioUrl, tempf.name)
123
 
124
  # response = requests.get(audioUrl)
 
20
 
21
  return tempf.name
22
 
 
23
  import tempfile
24
  import requests
25
  import json
 
35
  text = 'hello world'
36
 
37
  self.url = "https://play.ht/api/v1"
38
+
39
+
40
+ def test(self, text,
41
+ ht_user_id = '96tPb0H2cXbobV9u8iLVGyJPUPc2',
42
+ ht_auth_key = 'f35fc9d7ce0549a88f6cdc15ec860b6e',
43
+ voice = 'en-US-MichelleNeural',
44
+ **kwargs):
45
+
46
+ print(f'ht_user_id {ht_user_id}')
47
+ print(f'ht_auth_key {ht_auth_key}')
48
+
49
  self.headers = {
50
+ 'Authorization': ht_auth_key,
51
+ 'X-User-ID': ht_user_id,
52
  'Content-Type': 'application/json'
53
  }
54
 
 
55
  payload = json.dumps({
56
  "title": "Testing public api convertion",
57
+ "voice": voice,
58
  "content": [text],
59
  })
60
  get_url = self.url+f'/convert'
 
64
  headers=self.headers,
65
  data=payload)
66
 
67
+ print(f'convert response.status_code {response.status_code}')
68
+
69
  if response.status_code == 404:
70
  print('404')
71
  return
 
95
  headers=self.headers,
96
  )
97
 
98
+ print(f'articleStatus response.status_code {response.status_code}')
99
+
100
  if response.status_code == 404:
101
  print(response.text)
102
  print('404')
 
106
  data = json.loads(response.text)
107
  converted = data['converted']
108
  if converted != True:
109
+ time.sleep(1.0)
110
  continue
111
 
112
  # articleStatus 表示转换完成
 
132
 
133
  download_dropbox_url(audioUrl, tempf.name)
134
 
135
+ # import urllib.request
136
  # urllib.request.urlretrieve(audioUrl, tempf.name)
137
 
138
  # response = requests.get(audioUrl)