Spaces:

Gradio-Blocks
/

magnificento

Runtime error

App Files Files Community

magnificento / app.py

muhtasham

Update app.py

0283c36 over 2 years ago

raw

history blame

3.25 kB

	import io, os, base64
	from PIL import Image
	import gradio as gr
	import shortuuid
	from transformers import pipeline

	#input voice/text
	#convert text to image via dalle
	#given list of labels and a selected image from gallery do zero-shot classification
	#tts your output label as: Your output looks like "label of zero-shot"

	asr = pipeline("automatic-speech-recognition")
	latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
	zero = pipeline("zero-shot-image-classification")
	tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")

	def text2image_latent(text, steps, width, height, images, diversity):
	print(text)
	results = latent(text, steps, width, height, images, diversity)
	image_paths = []
	for image in results[1]:
	image_str = image[0]
	image_str = image_str.replace("data:image/png;base64,","")
	decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
	img = Image.open(io.BytesIO(decoded_bytes))
	url = shortuuid.uuid()
	temp_dir = './tmp'
	if not os.path.exists(temp_dir):
	os.makedirs(temp_dir, exist_ok=True)
	image_path = f'{temp_dir}/{url}.png'
	img.save(f'{temp_dir}/{url}.png')
	image_paths.append(image_path)
	return(image_paths)


	def speech_to_text(mic=None, file=None):
	if mic is not None:
	audio = mic
	elif file is not None:
	audio = file
	else:
	return "You must either provide a mic recording or a file"
	transcription = asr(audio)["text"]
	return transcription

	#def zero_shot(image, labels_text):



	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	audio_file =[
	gr.Audio(source="microphone", type="filepath", optional=True),
	gr.Audio(source="upload", type="filepath", optional=True)]
	text = gr.Textbox(default="If you dont want to record or upload your voice you can input text here")
	with gr.Row():
	speech_to_text = gr.Button("Speech to text go brrr")
	with gr.Column():
	steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=50,maximum=50,minimum=1,step=1)
	width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
	height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
	images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=4, step=1, minimum=1, maximum=4)
	diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
	gallery = gr.Gallery(label="Individual images", show_label=True)
	with gr.Row():
	get_image_latent = gr.Button("Generate Image", css={"margin-top": "1em"})
	#with gr.Column():

	#with gr.Row():


	speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
	get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)


	demo.launch(enable_queue=False)