''' pip install extcolors ''' import os import tensorflow as tf os.environ['TFHUB_MODEL_LOAD_FORMAT'] = 'COMPRESSED' import numpy as np import PIL.Image import gradio as gr import tensorflow_hub as hub import matplotlib.pyplot as plt from real_esrgan_app import * import gradio as gr import requests import io import random import os from PIL import Image, ImageDraw, ImageFont from datasets import load_dataset import pandas as pd from time import sleep from tqdm import tqdm import extcolors from gradio_client import Client import cv2 import numpy as np import glob import pathlib API_TOKEN = os.environ.get("HF_READ_TOKEN") ''' dataset = load_dataset("Gustavosta/Stable-Diffusion-Prompts") prompt_df = dataset["train"].to_pandas() prompt_df = pd.read_csv("Stable-Diffusion-Prompts.csv") ''' #DEFAULT_MODEL = "stabilityai/stable-diffusion-2-1" #DEFAULT_PROMPT = "1girl, aqua eyes, baseball cap, blonde hair, closed mouth, earrings, green background, hat, hoop earrings, jewelry, looking at viewer, shirt, short hair, simple background, solo, upper body, yellow shirt" DEFAULT_PROMPT = "X go to Istanbul" DEFAULT_ROLE = "Superman" DEFAULT_BOOK_COVER = "book_cover_dir/Blank.jpg" hub_module = hub.load('https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2') def tensor_to_image(tensor): tensor = tensor*255 tensor = np.array(tensor, dtype=np.uint8) if np.ndim(tensor)>3: assert tensor.shape[0] == 1 tensor = tensor[0] return PIL.Image.fromarray(tensor) def perform_neural_transfer(content_image_input, style_image_input, super_resolution_type, hub_module = hub_module): content_image = content_image_input.astype(np.float32)[np.newaxis, ...] / 255. content_image = tf.image.resize(content_image, (400, 600)) #style_image_input = style_urls[style_image_input] #style_image_input = plt.imread(style_image_input) style_image = style_image_input.astype(np.float32)[np.newaxis, ...] / 255. style_image = tf.image.resize(style_image, (256, 256)) outputs = hub_module(tf.constant(content_image), tf.constant(style_image)) stylized_image = outputs[0] stylized_image = tensor_to_image(stylized_image) content_image_input = tensor_to_image(content_image_input) stylized_image = stylized_image.resize(content_image_input.size) print("super_resolution_type :") print(super_resolution_type) #print(super_resolution_type.value) if super_resolution_type not in ["base", "anime"]: return stylized_image else: print("call else :") stylized_image = inference(stylized_image, super_resolution_type) return stylized_image list_models = [ #"SDXL-1.0", "Pixel-Art-XL", "SD-1.5", "OpenJourney-V4", "Anything-V4", "Disney-Pixar-Cartoon", "Dalle-3-XL", #"Midjourney-V4-XL", ] #list_prompts = get_samples() def generate_txt2img(current_model, prompt, is_negative=False, image_style="None style", steps=50, cfg_scale=7, seed=None, API_TOKEN = API_TOKEN): ''' if current_model == "SD-1.5": API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5" elif current_model == "SDXL-1.0": API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0" elif current_model == "OpenJourney-V4": API_URL = "https://api-inference.huggingface.co/models/prompthero/openjourney" elif current_model == "Anything-V4": API_URL = "https://api-inference.huggingface.co/models/xyn-ai/anything-v4.0" elif current_model == "Disney-Pixar-Cartoon": API_URL = "https://api-inference.huggingface.co/models/stablediffusionapi/disney-pixar-cartoon" elif current_model == "Pixel-Art-XL": API_URL = "https://api-inference.huggingface.co/models/nerijs/pixel-art-xl" elif current_model == "Dalle-3-XL": API_URL = "https://api-inference.huggingface.co/models/openskyml/dalle-3-xl" elif current_model == "Midjourney-V4-XL": API_URL = "https://api-inference.huggingface.co/models/openskyml/midjourney-v4-xl" ''' if current_model == "SD-1.5": API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5" elif current_model == "OpenJourney-V4": API_URL = "https://api-inference.huggingface.co/models/prompthero/openjourney" elif current_model == "Anything-V4": API_URL = "https://api-inference.huggingface.co/models/xyn-ai/anything-v4.0" elif current_model == "Disney-Pixar-Cartoon": API_URL = "https://api-inference.huggingface.co/models/stablediffusionapi/disney-pixar-cartoon" elif current_model == "Pixel-Art-XL": API_URL = "https://api-inference.huggingface.co/models/nerijs/pixel-art-xl" elif current_model == "Dalle-3-XL": API_URL = "https://api-inference.huggingface.co/models/openskyml/dalle-3-xl" #API_TOKEN = os.environ.get("HF_READ_TOKEN") headers = {"Authorization": f"Bearer {API_TOKEN}"} if type(prompt) != type(""): prompt = DEFAULT_PROMPT if image_style == "None style": payload = { "inputs": prompt + ", 8k", "is_negative": is_negative, "steps": steps, "cfg_scale": cfg_scale, "seed": seed if seed is not None else random.randint(-1, 2147483647) } elif image_style == "Cinematic": payload = { "inputs": prompt + ", realistic, detailed, textured, skin, hair, eyes, by Alex Huguet, Mike Hill, Ian Spriggs, JaeCheol Park, Marek Denko", "is_negative": is_negative + ", abstract, cartoon, stylized", "steps": steps, "cfg_scale": cfg_scale, "seed": seed if seed is not None else random.randint(-1, 2147483647) } elif image_style == "Digital Art": payload = { "inputs": prompt + ", faded , vintage , nostalgic , by Jose Villa , Elizabeth Messina , Ryan Brenizer , Jonas Peterson , Jasmine Star", "is_negative": is_negative + ", sharp , modern , bright", "steps": steps, "cfg_scale": cfg_scale, "seed": seed if seed is not None else random.randint(-1, 2147483647) } elif image_style == "Portrait": payload = { "inputs": prompt + ", soft light, sharp, exposure blend, medium shot, bokeh, (hdr:1.4), high contrast, (cinematic, teal and orange:0.85), (muted colors, dim colors, soothing tones:1.3), low saturation, (hyperdetailed:1.2), (noir:0.4), (natural skin texture, hyperrealism, soft light, sharp:1.2)", "is_negative": is_negative, "steps": steps, "cfg_scale": cfg_scale, "seed": seed if seed is not None else random.randint(-1, 2147483647) } image_bytes = requests.post(API_URL, headers=headers, json=payload).content image = Image.open(io.BytesIO(image_bytes)) return image from huggingface_hub import InferenceClient import gradio as gr import pandas as pd import numpy as np import os event_reasoning_df = pd.DataFrame( [['Use the following events as a background to answer questions related to the cause and effect of time.', 'Ok'], ['What are the necessary preconditions for the next event?:X had a big meal.', 'X placed an order'], ['What could happen after the next event?:X had a big meal.', 'X becomes fat'], ['What is the motivation for the next event?:X had a big meal.', 'X is hungry'], ['What are your feelings after the following event?:X had a big meal.', "X tastes good"], ['What are the necessary preconditions for the next event?:X met his favorite star.', 'X bought a ticket'], ['What could happen after the next event?:X met his favorite star.', 'X is motivated'], ['What is the motivation for the next event?:X met his favorite star.', 'X wants to have some entertainment'], ['What are your feelings after the following event?:X met his favorite star.', "X is in a happy mood"], ['What are the necessary preconditions for the next event?: X to cheat', 'X has evil intentions'], ['What could happen after the next event?:X to cheat', 'X is accused'], ['What is the motivation for the next event?:X to cheat', 'X wants to get something for nothing'], ['What are your feelings after the following event?:X to cheat', "X is starving and freezing in prison"], ['What could happen after the next event?:X go to Istanbul', ''], ], columns = ["User", "Assistant"] ) Mistral_7B_client = InferenceClient( "mistralai/Mistral-7B-Instruct-v0.1" ) NEED_PREFIX = 'What are the necessary preconditions for the next event?' EFFECT_PREFIX = 'What could happen after the next event?' INTENT_PREFIX = 'What is the motivation for the next event?' REACT_PREFIX = 'What are your feelings after the following event?' def format_prompt(message, history): prompt = "" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt def generate( prompt, history, client = Mistral_7B_client, temperature=0.7, max_new_tokens=256, top_p=0.95, repetition_penalty=1.1, ): temperature = float(temperature) if temperature < 1e-2: temperature = 1e-2 top_p = float(top_p) generate_kwargs = dict( temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, ) formatted_prompt = format_prompt(prompt, history) stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in stream: output += response.token.text yield output return output hist = event_reasoning_df.iloc[:-1, :].apply( lambda x: (x["User"], x["Assistant"]), axis = 1 ) def produce_4_event(event_fact, hist = hist): NEED_PREFIX_prompt = "{}:{}".format(NEED_PREFIX, event_fact) EFFECT_PREFIX_prompt = "{}:{}".format(EFFECT_PREFIX, event_fact) INTENT_PREFIX_prompt = "{}:{}".format(INTENT_PREFIX, event_fact) REACT_PREFIX_prompt = "{}:{}".format(REACT_PREFIX, event_fact) NEED_PREFIX_output = list(generate(NEED_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1] EFFECT_PREFIX_output = list(generate(EFFECT_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1] INTENT_PREFIX_output = list(generate(INTENT_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1] REACT_PREFIX_output = list(generate(REACT_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1] NEED_PREFIX_output, EFFECT_PREFIX_output, INTENT_PREFIX_output, REACT_PREFIX_output = map(lambda x: x.replace("", ""), [NEED_PREFIX_output, EFFECT_PREFIX_output, INTENT_PREFIX_output, REACT_PREFIX_output]) return { NEED_PREFIX: NEED_PREFIX_output, EFFECT_PREFIX: EFFECT_PREFIX_output, INTENT_PREFIX: INTENT_PREFIX_output, REACT_PREFIX: REACT_PREFIX_output, } def transform_4_event_as_sd_prompts(event_fact ,event_reasoning_dict, role_name = "superman"): req = {} for k, v in event_reasoning_dict.items(): if type(role_name) == type("") and role_name.strip(): v_ = v.replace("X", role_name) else: v_ = v req[k] = list(generate("Transform this as a prompt in stable diffusion: {}".\ format(v_), history = [], max_new_tokens = 2048))[-1].replace("", "") event_fact_ = event_fact.replace("X", role_name) req["EVENT_FACT"] = list(generate("Transform this as a prompt in stable diffusion: {}".\ format(event_fact_), history = [], max_new_tokens = 2048))[-1].replace("", "") req_list = [ req[INTENT_PREFIX], req[NEED_PREFIX], req["EVENT_FACT"], req[REACT_PREFIX], req[EFFECT_PREFIX] ] caption_list = [ event_reasoning_dict[INTENT_PREFIX], event_reasoning_dict[NEED_PREFIX], event_fact, event_reasoning_dict[REACT_PREFIX], event_reasoning_dict[EFFECT_PREFIX] ] caption_list = list(map(lambda x: x.replace("X", role_name), caption_list)) return caption_list ,req_list def batch_as_list(input_, batch_size = 3): req = [] for ele in input_: if not req or len(req[-1]) >= batch_size: req.append([ele]) else: req[-1].append(ele) return req def add_margin(pil_img, top, right, bottom, left, color): width, height = pil_img.size new_width = width + right + left new_height = height + top + bottom result = Image.new(pil_img.mode, (new_width, new_height), color) result.paste(pil_img, (left, top)) return result def add_caption_on_image(input_image, caption, marg_ratio = 0.15, row_token_num = 6): from uuid import uuid1 assert hasattr(input_image, "save") max_image_size = max(input_image.size) marg_size = int(marg_ratio * max_image_size) colors, pixel_count = extcolors.extract_from_image(input_image) input_image = add_margin(input_image, marg_size, 0, 0, marg_size, colors[0][0]) ''' tmp_name = "{}.png".format(uuid1()) input_image.save(tmp_name) ImageCaptioner.add_captions(tmp_name, caption, overwrite = 1, size = int(marg_size / 4), align = "TOP_LEFT", output = tmp_name, color = "black", ) output_image = Image.open(tmp_name) os.remove(tmp_name) ''' font = ImageFont.truetype("DejaVuSerif-Italic.ttf" ,int(marg_size / 4)) caption_token_list = list(map(lambda x: x.strip() ,caption.split(" "))) caption_list = list(map(" ".join ,batch_as_list(caption_token_list, row_token_num))) draw = ImageDraw.Draw(input_image) for line_num ,line_caption in enumerate(caption_list): position = ( int(marg_size / 4) * (line_num + 1) * 1.1 , (int(marg_size / 4) * ( (line_num + 1) * 1.1 ))) draw.text(position, line_caption, fill="black", font = font) return input_image def expand2square(pil_img, background_color): width, height = pil_img.size if width == height: return pil_img elif width > height: result = Image.new(pil_img.mode, (width, width), background_color) result.paste(pil_img, (0, (width - height))) return result else: result = Image.new(pil_img.mode, (height, height), background_color) result.paste(pil_img, ((height - width))) return result def generate_video(images, video_name = 'ppt.avi'): import cv2 from uuid import uuid1 im_names = [] for im in images: name = "{}.png".format(uuid1()) im.save(name) im_names.append(name) frame = cv2.imread(im_names[0]) # setting the frame width, height width # the width, height of first image height, width, layers = frame.shape video = cv2.VideoWriter(video_name, 0, 1, (width, height)) # Appending the images to the video one by one for name in im_names: video.write(cv2.imread(name)) os.remove(name) # Deallocating memories taken for window creation cv2.destroyAllWindows() video.release() # releasing the video generated def make_video_from_image_list(image_list, video_name = "ppt.avi"): if os.path.exists(video_name): os.remove(video_name) assert all(map(lambda x: hasattr(x, "save"), image_list)) max_size = list(map(max ,zip(*map(lambda x: x.size, image_list)))) max_size = max(max_size) image_list = list(map(lambda x: expand2square(x, extcolors.extract_from_image(x)[0][0][0] ).resize((max_size, max_size)), image_list)) generate_video(image_list, video_name = video_name) return video_name ''' style_transfer_client = Client("https://svjack-super-resolution-neural-style-transfer.hf.space") def style_transfer_func(content_img, style_img, style_transfer_client = style_transfer_client): from uuid import uuid1 assert hasattr(content_img, "save") assert hasattr(style_img, "save") content_im_name = "{}.png".format(uuid1()) style_im_name = "{}.png".format(uuid1()) content_img.save(content_im_name) style_img.save(style_im_name) out = style_transfer_client.predict( content_im_name, style_im_name, "none", fn_index=1 ) os.remove(content_im_name) os.remove(style_im_name) return Image.open(out) ''' def style_transfer_func(content_img, style_img, super_resolution_type = "none"): assert hasattr(content_img, "save") assert hasattr(style_img, "save") content_image_input = np.asarray(content_img) style_image_input = np.asarray(style_img) out = perform_neural_transfer(content_image_input, style_image_input, super_resolution_type = super_resolution_type) assert hasattr(out, "save") return out def gen_images_from_event_fact(current_model, event_fact = DEFAULT_PROMPT, role_name = DEFAULT_ROLE, style_pic = None, super_resolution_type = "SD(Standard Definition)" ): assert super_resolution_type in ["SD(Standard Definition)", "HD(High Definition)"] event_reasoning_dict = produce_4_event(event_fact) caption_list ,event_reasoning_sd_list = transform_4_event_as_sd_prompts(event_fact , event_reasoning_dict, role_name = role_name ) img_list = [] for prompt in tqdm(event_reasoning_sd_list): im = generate_txt2img(current_model, prompt, is_negative=False, image_style="None style") img_list.append(im) sleep(2) img_list = list(filter(lambda x: hasattr(x, "save"), img_list)) if style_pic is not None and hasattr(style_pic, "size"): style_pic = Image.fromarray(style_pic.astype(np.uint8)) print("perform styling.....") img_list_ = [] for x in tqdm(img_list): if super_resolution_type == "SD(Standard Definition)": img_list_.append( style_transfer_func(x, style_pic, super_resolution_type = "none") ) else: img_list_.append( style_transfer_func(x, style_pic, super_resolution_type = "anime") ) img_list = img_list_ img_list = list(map(lambda t2: add_caption_on_image(t2[0], t2[1]) ,zip(*[img_list, caption_list]))) img_mid = img_list[2] img_list_reordered = [img_mid] for ele in img_list: if ele not in img_list_reordered: img_list_reordered.append(ele) video_path = make_video_from_image_list(img_list_reordered) return video_path def image_click(images, evt: gr.SelectData, ): #print(images) #print(evt.index) img_selected = images[evt.index][0]["name"] #print(img_selected) return img_selected def get_book_covers(): covers = pd.Series( list(pathlib.Path("book_cover_dir").rglob("*.jpg")) + \ list(pathlib.Path("book_cover_dir").rglob("*.png")) + \ list(pathlib.Path("book_cover_dir").rglob("*.jpeg")) ).map(str).map(lambda x: np.nan if x.split("/")[-1].startswith("_") else x).dropna().map( lambda x: (x, "".join(x.split(".")[:-1]).split("/")[-1]) ).values.tolist() covers = sorted(covers, key = lambda t2: int(DEFAULT_BOOK_COVER in t2[0]), reverse = True) return covers with gr.Blocks(css=".caption-label {display:none}") as demo: favicon = '' gr.Markdown( f"""

🌻{favicon} AI Diffusion

""" ) with gr.Row(): with gr.Column(elem_id="prompt-container"): current_model = gr.Dropdown(label="Current Model", choices=list_models, value="Pixel-Art-XL") style_reference_input_gallery = gr.Gallery(get_book_covers(), #width = 512, height = 512, label = "StoryBook Cover (click to use)") with gr.Column(elem_id="prompt-container"): #with gr.Row(elem_id="prompt-container"): style_reference_input_image = gr.Image( label = "StoryBook Cover (you can upload yourself or click from left gallery)", #width = 512, value = DEFAULT_BOOK_COVER, interactive = True, ) super_resolution_type = gr.Radio(choices = ["SD(Standard Definition)" ,"HD(High Definition)"], value="SD(Standard Definition)", label="Story Video Quality", interactive = True) with gr.Row(): text_prompt = gr.Textbox(label="Prompt", placeholder="a cute dog", lines=1, elem_id="prompt-text-input", value = DEFAULT_PROMPT) role_name = gr.Textbox(label="Role", placeholder=DEFAULT_ROLE, lines=1, elem_id="prompt-text-input", value = DEFAULT_ROLE) text_button = gr.Button("Generate", variant='primary', elem_id="gen-button") with gr.Row(): #image_output = gr.Image(type="pil", label="Output Image", elem_id="gallery") #image_output = gr.Gallery(label="Output Images", elem_id="gallery") video_output = gr.Video(label = "Story Video", elem_id="gallery") #text_button.click(generate_txt2img, inputs=[current_model, text_prompt, negative_prompt, image_style], outputs=image_output) style_reference_input_gallery.select( image_click, style_reference_input_gallery, style_reference_input_image ) text_button.click(gen_images_from_event_fact, inputs=[current_model, text_prompt, role_name, style_reference_input_image, super_resolution_type], outputs=video_output) #select_button.click(generate_txt2img, inputs=[current_model, select_prompt, negative_prompt, image_style], outputs=image_output) #demo.load(get_params, None, select_prompt) demo.launch(show_api=False)