Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -60,17 +60,19 @@ from transformers import (
|
|
60 |
AutoTokenizer,
|
61 |
pipeline,
|
62 |
)
|
63 |
-
from melo.api import TTS
|
64 |
|
65 |
# LM_model, LM_tokenizer = load("mlx-community/SmolLM-360M-Instruct")
|
66 |
chat = Chat(2)
|
67 |
chat.init_chat({"role": "system", "content": "You are a helpful and friendly AI assistant. You are polite, respectful, and aim to provide concise responses of less than 20 words."})
|
68 |
user_role = "user"
|
69 |
|
70 |
-
tts_model = TTS(language="EN_NEWEST", device="auto")
|
71 |
-
speaker_id = tts_model.hps.data.spk2id["EN-Newest"]
|
72 |
blocksize = 512
|
73 |
-
|
|
|
|
|
74 |
dummy_input = torch.randn(
|
75 |
(3000),
|
76 |
dtype=getattr(torch, "float16"),
|
@@ -192,9 +194,11 @@ def transcribe(stream, new_chunk):
|
|
192 |
chat.append({"role": "assistant", "content": generated_text})
|
193 |
text_str=generated_text
|
194 |
# import pdb;pdb.set_trace()
|
195 |
-
audio_chunk =
|
|
|
196 |
audio_chunk = (audio_chunk * 32768).astype(np.int16)
|
197 |
-
|
|
|
198 |
print("--- %s seconds ---" % (time.time() - start_time))
|
199 |
# else:
|
200 |
# audio_output=None
|
|
|
60 |
AutoTokenizer,
|
61 |
pipeline,
|
62 |
)
|
63 |
+
# from melo.api import TTS
|
64 |
|
65 |
# LM_model, LM_tokenizer = load("mlx-community/SmolLM-360M-Instruct")
|
66 |
chat = Chat(2)
|
67 |
chat.init_chat({"role": "system", "content": "You are a helpful and friendly AI assistant. You are polite, respectful, and aim to provide concise responses of less than 20 words."})
|
68 |
user_role = "user"
|
69 |
|
70 |
+
# tts_model = TTS(language="EN_NEWEST", device="auto")
|
71 |
+
# speaker_id = tts_model.hps.data.spk2id["EN-Newest"]
|
72 |
blocksize = 512
|
73 |
+
with torch.no_grad():
|
74 |
+
wav = text2speech("Sid")["wav"]
|
75 |
+
# tts_model.tts_to_file("text", speaker_id, quiet=True)
|
76 |
dummy_input = torch.randn(
|
77 |
(3000),
|
78 |
dtype=getattr(torch, "float16"),
|
|
|
194 |
chat.append({"role": "assistant", "content": generated_text})
|
195 |
text_str=generated_text
|
196 |
# import pdb;pdb.set_trace()
|
197 |
+
audio_chunk = text2speech(text_str)["wav"]
|
198 |
+
# audio_chunk = tts_model.tts_to_file(text_str, speaker_id, quiet=True)
|
199 |
audio_chunk = (audio_chunk * 32768).astype(np.int16)
|
200 |
+
print(text2speech.fs)
|
201 |
+
audio_output=(text2speech.fs, audio_chunk)
|
202 |
print("--- %s seconds ---" % (time.time() - start_time))
|
203 |
# else:
|
204 |
# audio_output=None
|