|
import gradio as gr |
|
import base64 |
|
import numpy as np |
|
from scipy.io import wavfile |
|
from voice_processing import tts, get_model_names, voice_mapping |
|
from io import BytesIO |
|
import json |
|
from concurrent.futures import ThreadPoolExecutor, as_completed |
|
import asyncio |
|
|
|
async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload): |
|
edge_tts_voice = voice_mapping.get(selected_voice) |
|
if not edge_tts_voice: |
|
return {"error": f"Invalid voice '{selected_voice}'."}, None |
|
|
|
voice_upload_file = None |
|
if use_uploaded_voice and voice_upload is not None: |
|
with open(voice_upload.name, 'rb') as f: |
|
voice_upload_file = f.read() |
|
|
|
info, edge_tts_output_path, tts_output_data, edge_output_file = await tts( |
|
model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file |
|
) |
|
|
|
_, audio_output = tts_output_data |
|
|
|
audio_bytes = None |
|
if isinstance(audio_output, np.ndarray): |
|
byte_io = BytesIO() |
|
wavfile.write(byte_io, 40000, audio_output) |
|
byte_io.seek(0) |
|
audio_bytes = byte_io.read() |
|
else: |
|
audio_bytes = audio_output |
|
|
|
audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}" |
|
return {"info": info}, audio_data_uri |
|
|
|
def convert_tts_sync(*args): |
|
loop = asyncio.new_event_loop() |
|
asyncio.set_event_loop(loop) |
|
return loop.run_until_complete(convert_tts(*args)) |
|
|
|
def batch_convert_tts(json_input): |
|
results = [] |
|
|
|
try: |
|
batch_data = json.loads(json_input) |
|
except Exception as e: |
|
return {"error": f"Failed to parse JSON input: {str(e)}"} |
|
|
|
with ThreadPoolExecutor() as executor: |
|
future_to_entry = { |
|
executor.submit( |
|
convert_tts_sync, |
|
entry.get("model_name"), |
|
entry.get("text"), |
|
entry.get("voice"), |
|
entry.get("slang_rate", 0.5), |
|
entry.get("use_uploaded_voice", False), |
|
entry.get("voice_upload", None) |
|
): entry for entry in batch_data |
|
} |
|
|
|
for future in as_completed(future_to_entry): |
|
entry = future_to_entry[future] |
|
try: |
|
result = future.result() |
|
results.append({"info": result[0], "audio_uri": result[1]}) |
|
except Exception as e: |
|
results.append({"error": str(e)}) |
|
|
|
return json.dumps(results, indent=4) |
|
|
|
def get_models(): |
|
return get_model_names() |
|
|
|
def get_voices(): |
|
return list(voice_mapping.keys()) |
|
|
|
iface = gr.Interface( |
|
fn=convert_tts_sync, |
|
inputs=[ |
|
gr.Dropdown(choices=get_models(), label="Model", interactive=True), |
|
gr.Textbox(label="Text", placeholder="Enter text here"), |
|
gr.Dropdown(choices=get_voices(), label="Voice", interactive=True), |
|
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"), |
|
gr.Checkbox(label="Use Uploaded Voice"), |
|
gr.File(label="Voice File") |
|
], |
|
outputs=[ |
|
gr.JSON(label="Info"), |
|
gr.Textbox(label="Audio URI") |
|
], |
|
title="Text-to-Speech Conversion", |
|
allow_flagging="never" |
|
) |
|
|
|
batch_iface = gr.Interface( |
|
fn=batch_convert_tts, |
|
inputs=gr.Textbox(label="JSON Input", lines=20, placeholder='Paste your JSON input here'), |
|
outputs=gr.JSON(label="Batch Results"), |
|
title="Batch Text-to-Speech Conversion", |
|
allow_flagging="never" |
|
) |
|
|
|
app = gr.TabbedInterface( |
|
interface_list=[iface, batch_iface], |
|
tab_names=["Single Conversion", "Batch Conversion"] |
|
) |
|
|
|
app.launch() |
|
|
|
|
|
|