openpecha
/

speecht5-tts-01

Inference Endpoints

Model card Files Files and versions Community

TenzinGayche commited on Sep 28, 2023

Commit

37e5267

•

1 Parent(s): 049c36a

Update handler.py

Files changed (1) hide show

handler.py +10 -8

handler.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import  Dict
 import librosa
 import numpy as np
 import torch
@@ -52,14 +52,16 @@ class EndpointHandler():
         self.vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
-    def __call__(self, data: Dict[str]) -> Dict[str, str]:
-        """
         Args:
-            data (:obj:):
-                includes the deserialized audio file as bytes
-        Return:
-            A :obj:`dict`:. base64 encoded image
         """
         # process input
         if len(text.strip()) == 0:
@@ -75,4 +77,4 @@ class EndpointHandler():
         speaker_embedding = torch.tensor(speaker_embedding)
         speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=vocoder.to('cuda'))
         speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
-        return (16000, speech)

+from typing import  Dict, Any
 import librosa
 import numpy as np
 import torch
         self.vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+    def __call__(self, data: Dict[str, Any]) -> bytes:
+        """_summary_
         Args:
+            data (Dict[str, Any]): _description_
+        Returns:
+            bytes: _description_
         """
         # process input
         if len(text.strip()) == 0:
         speaker_embedding = torch.tensor(speaker_embedding)
         speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=vocoder.to('cuda'))
         speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
+        return speech.tobytes()