import base64 import gradio as gr import torch import torchvision from diffusers import DiffusionPipeline import PIL.Image import numpy as np from io import BytesIO ldm = DiffusionPipeline.from_pretrained("fusing/latent-diffusion-text2im-large") generator = torch.manual_seed(42) def greet(name): #prompt = "A squirrel eating a burger" prompt = name image = ldm([prompt], generator=generator, eta=0.3, guidance_scale=6.0, num_inference_steps=50) image_processed = image.cpu().permute(0, 2, 3, 1) image_processed = image_processed * 255. image_processed = image_processed.numpy().astype(np.uint8) image_pil = PIL.Image.fromarray(image_processed[0]) # save image as buffer buffered = BytesIO() image_pil.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()) print(img_str.decode('utf-8')) return img_str.decode('utf-8') #return "Gello " + prompt + "!!" image = gr.Image(type="pil", label="Your result") iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch()