Spaces:

PolyAI
/

pheme

Runtime error

App Files Files Community

pheme / app.py

pfb30

Update app.py

e946294 verified 7 months ago

raw

history blame contribute delete

No virus

1.98 kB

	"""Simple demo app.

	Copyright PolyAI Limited.
	"""
	import time
	from pathlib import Path

	import gradio as gr

	from transformer_infer import PhemeClient, parse_arguments


	VOICE_OPTIONS = [
	"male_voice",
	"POD1000000004_S0000246",
	"POD1000000018_S0000253",
	"POD1000000048_S0000035",
	"YOU1000000006_S0000051",
	"YOU1000000044_S0000798",
	]

	args = parse_arguments()

	model = PhemeClient(args)


	def inference(
	text,
	voice,
	top_k,
	temperature
	):
	with open("PhemeVoice.log", "a") as f:
	f.write(f"{voice}: {text} \n")
	start_time = time.time()

	data = model.infer(
	text, voice, top_k=top_k, temperature=temperature)
	samplerate = 16_000
	print("Time taken: ", time.time() - start_time)
	yield (samplerate, data)


	def main():
	title = "Pheme"
	description = """Pheme Model can generate a variety of conversational voices in 16 kHz for phone-call applications.

	Paper: https://arxiv.org/pdf/2401.02839.pdf
	Github: https://github.com/PolyAI-LDN/pheme

	Voices are generated in a zero-shot manner, the model has never seen them before.
	"""
	text = gr.Textbox(
	lines=3,
	value="I gotta say, I never expect that to happened. Um I had some expectations but you know.",
	label="Text",
	)

	voice = gr.Dropdown(
	VOICE_OPTIONS, value="POD1000000048_S0000035", label="Select voice:", type="value"
	)
	temperature = gr.Slider(minimum=.3, maximum=1.5, value=0.7, step=0.05)
	top_k = gr.Slider(minimum=10, maximum=250, value=210)
	output_audio = gr.Audio(label="audio:", autoplay=True)
	interface = gr.Interface(
	fn=inference,
	inputs=[
	text,
	voice,
	top_k,
	temperature,
	],
	title=title,
	description=description,
	outputs=[output_audio],
	)
	interface.queue().launch(share=True)


	if __name__ == "__main__":
	main()