Siddhant commited on
Commit
bd1d7fa
1 Parent(s): e066930

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -60,17 +60,19 @@ from transformers import (
60
  AutoTokenizer,
61
  pipeline,
62
  )
63
- from melo.api import TTS
64
 
65
  # LM_model, LM_tokenizer = load("mlx-community/SmolLM-360M-Instruct")
66
  chat = Chat(2)
67
  chat.init_chat({"role": "system", "content": "You are a helpful and friendly AI assistant. You are polite, respectful, and aim to provide concise responses of less than 20 words."})
68
  user_role = "user"
69
 
70
- tts_model = TTS(language="EN_NEWEST", device="auto")
71
- speaker_id = tts_model.hps.data.spk2id["EN-Newest"]
72
  blocksize = 512
73
- tts_model.tts_to_file("text", speaker_id, quiet=True)
 
 
74
  dummy_input = torch.randn(
75
  (3000),
76
  dtype=getattr(torch, "float16"),
@@ -192,9 +194,11 @@ def transcribe(stream, new_chunk):
192
  chat.append({"role": "assistant", "content": generated_text})
193
  text_str=generated_text
194
  # import pdb;pdb.set_trace()
195
- audio_chunk = tts_model.tts_to_file(text_str, speaker_id, quiet=True)
 
196
  audio_chunk = (audio_chunk * 32768).astype(np.int16)
197
- audio_output=(44100, audio_chunk)
 
198
  print("--- %s seconds ---" % (time.time() - start_time))
199
  # else:
200
  # audio_output=None
 
60
  AutoTokenizer,
61
  pipeline,
62
  )
63
+ # from melo.api import TTS
64
 
65
  # LM_model, LM_tokenizer = load("mlx-community/SmolLM-360M-Instruct")
66
  chat = Chat(2)
67
  chat.init_chat({"role": "system", "content": "You are a helpful and friendly AI assistant. You are polite, respectful, and aim to provide concise responses of less than 20 words."})
68
  user_role = "user"
69
 
70
+ # tts_model = TTS(language="EN_NEWEST", device="auto")
71
+ # speaker_id = tts_model.hps.data.spk2id["EN-Newest"]
72
  blocksize = 512
73
+ with torch.no_grad():
74
+ wav = text2speech("Sid")["wav"]
75
+ # tts_model.tts_to_file("text", speaker_id, quiet=True)
76
  dummy_input = torch.randn(
77
  (3000),
78
  dtype=getattr(torch, "float16"),
 
194
  chat.append({"role": "assistant", "content": generated_text})
195
  text_str=generated_text
196
  # import pdb;pdb.set_trace()
197
+ audio_chunk = text2speech(text_str)["wav"]
198
+ # audio_chunk = tts_model.tts_to_file(text_str, speaker_id, quiet=True)
199
  audio_chunk = (audio_chunk * 32768).astype(np.int16)
200
+ print(text2speech.fs)
201
+ audio_output=(text2speech.fs, audio_chunk)
202
  print("--- %s seconds ---" % (time.time() - start_time))
203
  # else:
204
  # audio_output=None