Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,33 +1,37 @@
|
|
1 |
-
import os
|
2 |
import torch
|
3 |
import gradio as gr
|
4 |
import torchaudio
|
5 |
import time
|
6 |
from datetime import datetime
|
|
|
7 |
from tortoise.api import TextToSpeech
|
8 |
from tortoise.utils.text import split_and_recombine_text
|
9 |
from tortoise.utils.audio import load_audio, load_voice, load_voices
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
"shanuka",
|
14 |
-
"indian_f_1",
|
15 |
-
"kushan",
|
16 |
-
"afshak",
|
17 |
-
]
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
voice
|
22 |
-
):
|
23 |
-
if text is None or text.strip() == "":
|
24 |
-
raise gr.Error("Please provide text.")
|
25 |
|
26 |
-
|
|
|
|
|
27 |
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
|
|
|
|
|
31 |
|
32 |
for j, text in enumerate(texts):
|
33 |
for audio_frame in tts.tts_with_preset(
|
@@ -40,39 +44,29 @@ def inference(
|
|
40 |
yield (24000, audio_frame.cpu().detach().numpy())
|
41 |
|
42 |
def main():
|
43 |
-
title = "
|
44 |
-
description = ""
|
45 |
-
|
46 |
-
"""
|
47 |
-
text = gr.Textbox(
|
48 |
-
lines=4,
|
49 |
-
label="Text:",
|
50 |
-
)
|
51 |
|
52 |
voice = gr.Dropdown(
|
53 |
-
VOICE_OPTIONS, value="
|
54 |
)
|
55 |
|
56 |
-
output_audio = gr.Audio(label="streaming audio:", streaming=True, autoplay=True)
|
57 |
-
|
58 |
interface = gr.Interface(
|
59 |
-
fn=
|
60 |
inputs=[
|
61 |
-
|
62 |
voice
|
63 |
],
|
64 |
title=title,
|
65 |
description=description,
|
66 |
-
outputs=[
|
67 |
)
|
68 |
interface.queue().launch()
|
69 |
|
70 |
if __name__ == "__main__":
|
71 |
-
|
72 |
-
|
73 |
-
with open("Tortoise_TTS_Runs_Scripts.log", "a") as f:
|
74 |
f.write(
|
75 |
-
f"\n\n-------------------------
|
76 |
)
|
77 |
|
78 |
-
main()
|
|
|
|
|
1 |
import torch
|
2 |
import gradio as gr
|
3 |
import torchaudio
|
4 |
import time
|
5 |
from datetime import datetime
|
6 |
+
from transformers import pipeline
|
7 |
from tortoise.api import TextToSpeech
|
8 |
from tortoise.utils.text import split_and_recombine_text
|
9 |
from tortoise.utils.audio import load_audio, load_voice, load_voices
|
10 |
|
11 |
+
# STT Setup
|
12 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
model_id = "openai/whisper-tiny"
|
15 |
+
pipe = pipeline("automatic-speech-recognition", model=model_id, device=0)
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# TTS Setup
|
18 |
+
VOICE_OPTIONS = ["indian_F_1", "indian_F_2", "indian_F_3", "indian_M_1", "indian_M_2", "indian_M_3"]
|
19 |
+
tts = TextToSpeech(kv_cache=True, use_deepspeed=True, half=True)
|
20 |
|
21 |
+
def combined_inference(filepath, voice):
|
22 |
+
# STT: Convert audio to text
|
23 |
+
output = pipe(
|
24 |
+
filepath,
|
25 |
+
max_new_tokens=256,
|
26 |
+
generate_kwargs={"task": "transcribe", "language": "english"},
|
27 |
+
chunk_length_s=15,
|
28 |
+
batch_size=16,
|
29 |
+
)
|
30 |
+
text = output["text"]
|
31 |
|
32 |
+
# TTS: Convert text back to audio
|
33 |
+
texts = split_and_recombine_text(text)
|
34 |
+
voice_samples, conditioning_latents = load_voice(voice)
|
35 |
|
36 |
for j, text in enumerate(texts):
|
37 |
for audio_frame in tts.tts_with_preset(
|
|
|
44 |
yield (24000, audio_frame.cpu().detach().numpy())
|
45 |
|
46 |
def main():
|
47 |
+
title = "Combined STT and TTS"
|
48 |
+
description = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
voice = gr.Dropdown(
|
51 |
+
VOICE_OPTIONS, value="indian_F_1", label="Select voice:", type="value"
|
52 |
)
|
53 |
|
|
|
|
|
54 |
interface = gr.Interface(
|
55 |
+
fn=combined_inference,
|
56 |
inputs=[
|
57 |
+
gr.Audio(source="upload", type="filepath"),
|
58 |
voice
|
59 |
],
|
60 |
title=title,
|
61 |
description=description,
|
62 |
+
outputs=[gr.Audio(label="streaming audio:", streaming=True, autoplay=True)],
|
63 |
)
|
64 |
interface.queue().launch()
|
65 |
|
66 |
if __name__ == "__main__":
|
67 |
+
with open("Combined_STT_TTS_Runs_Scripts.log", "a") as f:
|
|
|
|
|
68 |
f.write(
|
69 |
+
f"\n\n-------------------------Combined STT TTS Scripts Logs, {datetime.now()}-------------------------\n"
|
70 |
)
|
71 |
|
72 |
+
main()
|