TenzinGayche commited on
Commit
75700af
1 Parent(s): 19ca4e3

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +10 -4
handler.py CHANGED
@@ -1,5 +1,6 @@
1
  from typing import Dict, Any,Union
2
  import librosa
 
3
  import numpy as np
4
  import torch
5
  import pyewts
@@ -10,6 +11,7 @@ from num2tib.core import convert2text
10
  import base64
11
  import re
12
  import requests
 
13
  converter = pyewts.pyewts()
14
  def download_file(url, destination):
15
  response = requests.get(url)
@@ -80,15 +82,19 @@ class EndpointHandler():
80
  text=cleanup_text(text)
81
  text=replace_numbers_with_convert(text)
82
  inputs = self.processor(text=text, return_tensors="pt")
83
- # limit input length
84
  input_ids = inputs["input_ids"]
85
  input_ids = input_ids[..., :self.model.config.max_text_positions]
86
  speaker_embedding = np.load(speaker_embeddings['Lhasa(female)'])
87
  speaker_embedding = torch.tensor(speaker_embedding)
88
  speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
89
  speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
 
 
 
 
 
 
90
  return {
91
  "sample_rate": 16000,
92
- "audio": base64.b64encode(speech.tostring()).decode("utf-8"),
93
-
94
- }
 
1
  from typing import Dict, Any,Union
2
  import librosa
3
+ import tempfile
4
  import numpy as np
5
  import torch
6
  import pyewts
 
11
  import base64
12
  import re
13
  import requests
14
+ import os
15
  converter = pyewts.pyewts()
16
  def download_file(url, destination):
17
  response = requests.get(url)
 
82
  text=cleanup_text(text)
83
  text=replace_numbers_with_convert(text)
84
  inputs = self.processor(text=text, return_tensors="pt")
 
85
  input_ids = inputs["input_ids"]
86
  input_ids = input_ids[..., :self.model.config.max_text_positions]
87
  speaker_embedding = np.load(speaker_embeddings['Lhasa(female)'])
88
  speaker_embedding = torch.tensor(speaker_embedding)
89
  speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
90
  speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
91
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
92
+ temp_wav_path = temp_wav_file.name
93
+ librosa.output.write_wav(temp_wav_path, speech.numpy(), sr=16000)
94
+ with open(temp_wav_path, "rb") as wav_file:
95
+ audio_base64 = base64.b64encode(wav_file.read()).decode("utf-8")
96
+ os.remove(temp_wav_path)
97
  return {
98
  "sample_rate": 16000,
99
+ "audio_base64": audio_base64,
100
+ }