Spaces:

Statical-Archives
/

STC-TTS-Balacoon

Paused

App Files Files Community

STC-TTS-Balacoon / app.py

Staticaliza

Update app.py

56e4c28 verified 3 months ago

raw

history blame contribute delete

No virus

3.19 kB

	from typing import cast

	import gradio as gr
	from balacoon_tts import TTS
	from huggingface_hub import hf_hub_download, list_repo_files

	import os
	import io
	import wave
	import base64

	KEY = # os.environ.get("KEY")

	default_max_lehgth = 250

	default_text_model = "en_us_hifi_jets_cpu.addon"
	default_text_speaker = "8051"

	model_path = hf_hub_download(repo_id = "balacoon/tts", filename = default_text_model)
	tts = TTS(model_path)
	base64_data = ""

	def audio_to_base64(sample_rate, audio_data):
	buffer = io.BytesIO()
	with wave.open(buffer, 'w') as wav_file:
	wav_file.setnchannels(1)
	wav_file.setsampwidth(2)
	wav_file.setframerate(sample_rate)
	wav_file.writeframes(audio_data.tobytes())

	wav_bytes = buffer.getvalue()
	base64_str = base64.b64encode(wav_bytes).decode('utf-8')
	return base64_str


	def synthesize_audio(access_key: str, text_str: str, text_model_str : str = "", text_speaker_str: str = ""):

	print(">>> MODEL CALLED: Input: " + text_str + ", Model: " + str(text_model_str) + ", Speaker: " + str(text_speaker_str))
	if (access_key != KEY):
	print(">>> MODEL FAILED: Attempted Key: " + access_key)
	return;

	if not text_str:
	return None
	if len(text_str) > default_max_lehgth:
	text_str = text_str[:default_max_lehgth]

	speakers = tts.get_speakers()
	value = speakers[-1]

	samples = cast(TTS, tts).synthesize(text_str, text_speaker_str)
	sampling_rate = cast(TTS, tts).get_sampling_rate()

	value = (cast(TTS, tts).get_sampling_rate(), samples)

	get_audio = gr.Audio.update(value = value)
	base64_data = audio_to_base64(value[0], value[1])

	return [get_audio, base64_data]

	def main():

	with gr.Blocks() as demo:
	with gr.Row(variant = "panel"):
	gr.Markdown("This is a basic Text-To-Speech (TTS) demo based on the Balacoon model.\n\n\nTo change the model / speaker, please refer to: https://huggingface.co/spaces/balacoon/tts")

	with gr.Row():
	with gr.Column(variant = "panel"):
	text = gr.Textbox(label = "Text Input", placeholder = "Input ...")
	with gr.Row(variant = "panel"):
	access_key = gr.Textbox(label = "Access Key", lines = 1)

	with gr.Row():
	with gr.Column(variant = "panel"):
	get_text_model = gr.Textbox(label = "Model Input", placeholder = "Model ...", value = default_text_model)
	with gr.Row(variant = "panel"):
	get_text_speaker = gr.Textbox(label = "Speaker Input", placeholder = "Speaker ...", value = default_text_speaker)

	with gr.Row(variant = "panel"):
	generate = gr.Button("Generate")

	with gr.Row(variant = "panel"):
	audio = gr.Audio()

	with gr.Row(variant = "panel"):
	base_output = gr.Textbox(label = "Model Output", placeholder = "Output ...", value = "")

	generate.click(synthesize_audio, inputs = [access_key, text, get_text_model, get_text_speaker], outputs = [audio, base_output])

	demo.launch()

	if __name__ == "__main__":
	main()