Spaces:

Gradio-Blocks
/

magnificento

Runtime error

App Files Files Community

magnificento / app.py

muhtasham

Update app.py

5c5bdc4 about 2 years ago

raw history blame

No virus

4.49 kB

	import io, os, base64
	from PIL import Image
	import gradio as gr
	import shortuuid
	import numpy as np
	from transformers import pipeline

	asr = pipeline("automatic-speech-recognition")
	latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
	zero = pipeline("zero-shot-image-classification")
	#zero = gr.Interface.load("spaces/Datatrooper/zero-shot-image-classification")
	#tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")

	def text2image_latent(text, steps, width, height, images, diversity):
	print(text)
	results = latent(text, steps, width, height, images, diversity)
	image_paths = []
	for image in results[1]:
	image_str = image[0]
	image_str = image_str.replace("data:image/png;base64,","")
	decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
	img = Image.open(io.BytesIO(decoded_bytes))
	url = shortuuid.uuid()
	temp_dir = './tmp'
	if not os.path.exists(temp_dir):
	os.makedirs(temp_dir, exist_ok=True)
	image_path = f'{temp_dir}/{url}.png'
	img.save(f'{temp_dir}/{url}.png')
	image_paths.append(image_path)
	return(image_paths)


	def speech_to_text(mic=None, file=None, state=""):
	if mic is not None:
	audio = mic
	elif file is not None:
	audio = file
	else:
	return "You must either provide a mic recording or a file"
	transcription = asr(audio)["text"]
	state += text + " "
	return state, state

	def zero_shot(image, text_input):
	PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
	labels = labels_text.split(",")
	res = pipe(images=PIL_image,
	candidate_labels=labels,
	hypothesis_template= "This is a photo of a {}")
	return {dic["label"]: dic["score"] for dic in res}

	def shot(image, labels_text):
	PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
	labels = labels_text.split(",")
	res = pipe(images= PIL_image,
	candidate_labels=labels,
	hypothesis_template= "This is a photo of a {}")
	return {dic["label"]: dic["score"] for dic in res}

	with gr.Blocks() as demo:
	gr.Markdown( """
	- 🎤 Input voice/text
	- ✨ Convert voice/text to image via Latent Diffusion
	- 🤖 Given list of labels and a selected image from gallery do zero-shot classification
	- 🎛️ Coming soon: TTS(audio) your output label as: Your output looks like "label of zero-shot"
	""")
	with gr.Row():
	with gr.Column():
	audio_file =[
	gr.Audio(source="microphone", type="filepath", optional=True), "state",
	gr.Audio(source="upload", type="filepath", optional=True)]
	text = gr.Textbox(label="Text", placeholder="If you dont want to record or upload your voice you can input text here")
	with gr.Row():
	speech_to_text = gr.Button("Speech to text go brrr", css={"margin-top": "1em"})
	with gr.Column():
	steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=50,maximum=50,minimum=1,step=1)
	width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
	height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
	images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=1, step=1, minimum=1, maximum=4)
	diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
	#gallery = [gr.outputs.Image(type="pil"),gr.outputs.Textbox(label="Error")]
	gallery = gr.Gallery(label="Individual images")
	with gr.Row():
	get_image_latent = gr.Button("Generate Image go brr")
	with gr.Column():
	text_input = gr.Textbox(label="Candidate labels", placeholder="input a list of labels separated by commas")
	label = gr.Label()
	with gr.Row():
	zero_shot_clf = gr.Button("Classify Image go brr")


	speech_to_text.click(speech_to_text, inputs=audio_file, outputs=[text,"state"])
	get_image_latent.click(text2image_latent, inputs=[text, steps, width, height, images, diversity], outputs=gallery)
	zero_shot_clf.click(zero_shot, inputs=[gallery, text_input], outputs=label)

	demo.launch(live=True)