import gradio as gr import openai import re import random import os openai.api_key = os.environ["OAI_key"] from transformers import AutoModelForCausalLM, AutoTokenizer import torch dialog_tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large") dialog_model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large") # Latent Diffusion latent_diff_interface = gr.Interface.load("spaces/multimodalart/latentdiffusion", api_key="secret", enable_queue=True) # Background Remover background_remove_interface = gr.Interface.load("spaces/nateraw/background-remover", api_key="secret", enable_queue=True) def is_generate_image(input): """ Return True if user wants to generate an image """ response = openai.Completion.create( engine="text-davinci-002", prompt="Prompt: Generate an image of a lion.\nQuestion: Generate an image? True or False?\nAnswer:True.\n\nPrompt: Remove background of the image.\nQuestion: Generate an image? True or False?\nAnswer:False.\n\nPrompt:" + input + "\nQuestion: Generate an image? True or False\nAnswer:", temperature=0.7, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0 ) response = re.sub(r'[^A-Za-z]', '', response["choices"][-1]["text"]) return response def generate_image(input): """ Generate image using latent diffusion from the input prompt """ # Find out what image to generate? input_prompt = openai.Completion.create( engine="text-davinci-002", prompt="Input: Generate an image of a \"lion\"\nQuestion: What is the prompt for image generation?\nAnswer: lion\n\nInput: I am feeling good. Please send me an image of the sky with mountains by Greg Rutkowski.\nQuestion: What is the prompt for image generation?\nAnswer: sky with mountains by Greg Rutkowski\n\nInput: " + input + "\nQuestion: What is the prompt for image generation?\nAnswer:", temperature=0.7, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0 ) text_prompt_for_image = input_prompt["choices"][-1]["text"] image_output = latent_diff_interface(text_prompt_for_image,45,256,256,1,5) text_response_for_image = "Generated image for: " + text_prompt_for_image return image_output[0], text_response_for_image def remove_background(input, generated_image=None, generate_image_flag="False"): # Check if user is asking for removing background. response = openai.Completion.create( engine="text-davinci-002", prompt="Prompt: remove background.\nQuestion: Remove the background? True or False?\nAnswer: True\n\nPrompt: Remove the background of image.\nQuestion: Remove the background? True or False?\nAnswer: True\n\nPrompt: " + input + "\nQuestion: Remove the background? True or False?\nAnswer:", temperature=0.7, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0 ) response = re.sub(r'[^A-Za-z]', '', response["choices"][-1]["text"]) if response == str(True) and generated_image != None: # Remove background rb_image_output = background_remove_interface(generated_image, 100) else: rb_image_output = None return rb_image_output def random_HF_space(input): response = openai.Completion.create( engine="text-davinci-002", prompt="User: I am feeling bored.\nQuestion: Is the user bored? True or False?\nAnswer: True\n\nUser: " + input + "\nQuestion: Is the user bored? True or False?\nAnswer:", temperature=0.7, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0 ) spaces_list = [" Click here for a suprise space ", " Click here for a suprise space ", " Click here for a suprise space ", " Click here for a suprise space ", " Click here for a suprise space ", " Click here for a suprise space "] response = re.sub(r'[^A-Za-z]', '', response["choices"][-1]["text"]) if response == str(True): response = spaces_list[random.randint(0,5)] else: response = None return response def DialogGPT(input, history=[], text_response_for_image = None): new_user_input_ids = dialog_tokenizer.encode(input + dialog_tokenizer.eos_token, return_tensors='pt') bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1) if text_response_for_image != None: text_response_for_image = dialog_tokenizer.encode(text_response_for_image + dialog_tokenizer.eos_token, return_tensors='pt') history = torch.cat([bot_input_ids, text_response_for_image], dim=-1).tolist() else: history = dialog_model.generate(bot_input_ids, max_length=1000, pad_token_id=dialog_tokenizer.eos_token_id).tolist() response = dialog_tokenizer.decode(history[0]).split("<|endoftext|>") response = [(response[i], response[i+1]) for i in range(0, len(response)-1, 2)] return response, history def predict(input, history=[]): rs_output = None generated_image = None generated_text = None if len(history)>1: previously_generated_image = history[-1][0] previously_generated_image_flag = history[-2][0] history = [history[-3]] # Check if user is asking for removing background generated_image = remove_background(input, previously_generated_image, previously_generated_image_flag) if generated_image != None: # Output as per removed background text_response_for_image = "Background removed." generated_text, history = DialogGPT(input, history, text_response_for_image) else: # Check if user is asking for generating image generate_image_flag = is_generate_image(input) if generate_image_flag == str(True): # Generate Image generated_image, text_response_for_image = generate_image(input) generated_text, history = DialogGPT(input, history, text_response_for_image) # Append history with generated image and generated image flag history.append([generate_image_flag]) history.append([generated_image]) else: # Check if user is bored rs_output = random_HF_space(input) if rs_output != None: # Output as per random space rs_text_input = "Try out the below space" generated_text, history = DialogGPT(input, history, rs_text_input) else: # Dialog GPT output generated_text, history = DialogGPT(input, history) return generated_text, history, rs_output, generated_image examples = [['Who is Elon Musk?'],['Generate an image of horse running in grassland.'], ['Remove the background of the image.'], ['I am bored.']] description = "Combines the sanity of Langauge models with HF spaces to create a unique experience. The chatbot is capable of understanding the user input for image generation, image background removal & a random space link if you are bored. For other inputs, the space uses DialogGPT-large." article = "![visitor badge](https://visitor-badge.glitch.me/badge?page_id=gradio-blocks_LM_meets_HF) \nHead on to discussions if you have any feedback." gr.Interface(fn=predict, inputs=["text", "state"], outputs=["chatbot", "state", gr.outputs.HTML(), gr.outputs.Image()], title="Language Model Meets Hugging Face", examples= examples, description=description, article=article).launch(debug=True, enable_queue=True)