|
''' |
|
pip install extcolors |
|
''' |
|
|
|
import os |
|
import tensorflow as tf |
|
os.environ['TFHUB_MODEL_LOAD_FORMAT'] = 'COMPRESSED' |
|
import numpy as np |
|
import PIL.Image |
|
import gradio as gr |
|
import tensorflow_hub as hub |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
import gradio as gr |
|
import requests |
|
import io |
|
import random |
|
import os |
|
from PIL import Image, ImageDraw, ImageFont |
|
|
|
from datasets import load_dataset |
|
import pandas as pd |
|
from time import sleep |
|
from tqdm import tqdm |
|
|
|
import extcolors |
|
from gradio_client import Client |
|
|
|
import cv2 |
|
import numpy as np |
|
import glob |
|
import pathlib |
|
|
|
API_TOKEN = os.environ.get("HF_READ_TOKEN") |
|
API_TOKEN = "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" |
|
|
|
''' |
|
dataset = load_dataset("Gustavosta/Stable-Diffusion-Prompts") |
|
prompt_df = dataset["train"].to_pandas() |
|
prompt_df = pd.read_csv("Stable-Diffusion-Prompts.csv") |
|
''' |
|
|
|
|
|
|
|
DEFAULT_PROMPT = "X go to Istanbul" |
|
DEFAULT_ROLE = "Superman" |
|
DEFAULT_BOOK_COVER = "book_cover_dir/Blank.jpg" |
|
|
|
hub_module = hub.load('https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2') |
|
|
|
def tensor_to_image(tensor): |
|
tensor = tensor*255 |
|
tensor = np.array(tensor, dtype=np.uint8) |
|
if np.ndim(tensor)>3: |
|
assert tensor.shape[0] == 1 |
|
tensor = tensor[0] |
|
return PIL.Image.fromarray(tensor) |
|
|
|
|
|
def perform_neural_transfer(content_image_input, style_image_input, hub_module = hub_module): |
|
content_image = content_image_input.astype(np.float32)[np.newaxis, ...] / 255. |
|
content_image = tf.image.resize(content_image, (400, 600)) |
|
|
|
|
|
|
|
style_image = style_image_input.astype(np.float32)[np.newaxis, ...] / 255. |
|
|
|
style_image = tf.image.resize(style_image, (256, 256)) |
|
|
|
outputs = hub_module(tf.constant(content_image), tf.constant(style_image)) |
|
stylized_image = outputs[0] |
|
|
|
stylized_image = tensor_to_image(stylized_image) |
|
content_image_input = tensor_to_image(content_image_input) |
|
stylized_image = stylized_image.resize(content_image_input.size) |
|
return stylized_image |
|
''' |
|
print("super_resolution_type :") |
|
print(super_resolution_type) |
|
#print(super_resolution_type.value) |
|
|
|
if super_resolution_type not in ["base", "anime"]: |
|
return stylized_image |
|
else: |
|
print("call else :") |
|
stylized_image = inference(stylized_image, super_resolution_type) |
|
return stylized_image |
|
''' |
|
|
|
list_models = [ |
|
|
|
"Pixel-Art-XL", |
|
"SD-1.5", |
|
"OpenJourney-V4", |
|
"Anything-V4", |
|
"Disney-Pixar-Cartoon", |
|
"Dalle-3-XL", |
|
|
|
] |
|
|
|
|
|
|
|
def generate_txt2img(current_model, prompt, is_negative=False, image_style="None style", steps=50, cfg_scale=7, |
|
seed=None, API_TOKEN = API_TOKEN): |
|
|
|
''' |
|
if current_model == "SD-1.5": |
|
API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5" |
|
elif current_model == "SDXL-1.0": |
|
API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0" |
|
elif current_model == "OpenJourney-V4": |
|
API_URL = "https://api-inference.huggingface.co/models/prompthero/openjourney" |
|
elif current_model == "Anything-V4": |
|
API_URL = "https://api-inference.huggingface.co/models/xyn-ai/anything-v4.0" |
|
elif current_model == "Disney-Pixar-Cartoon": |
|
API_URL = "https://api-inference.huggingface.co/models/stablediffusionapi/disney-pixar-cartoon" |
|
elif current_model == "Pixel-Art-XL": |
|
API_URL = "https://api-inference.huggingface.co/models/nerijs/pixel-art-xl" |
|
elif current_model == "Dalle-3-XL": |
|
API_URL = "https://api-inference.huggingface.co/models/openskyml/dalle-3-xl" |
|
elif current_model == "Midjourney-V4-XL": |
|
API_URL = "https://api-inference.huggingface.co/models/openskyml/midjourney-v4-xl" |
|
''' |
|
if current_model == "SD-1.5": |
|
API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5" |
|
elif current_model == "OpenJourney-V4": |
|
API_URL = "https://api-inference.huggingface.co/models/prompthero/openjourney" |
|
elif current_model == "Anything-V4": |
|
API_URL = "https://api-inference.huggingface.co/models/xyn-ai/anything-v4.0" |
|
elif current_model == "Disney-Pixar-Cartoon": |
|
API_URL = "https://api-inference.huggingface.co/models/stablediffusionapi/disney-pixar-cartoon" |
|
elif current_model == "Pixel-Art-XL": |
|
API_URL = "https://api-inference.huggingface.co/models/nerijs/pixel-art-xl" |
|
elif current_model == "Dalle-3-XL": |
|
API_URL = "https://api-inference.huggingface.co/models/openskyml/dalle-3-xl" |
|
|
|
|
|
|
|
headers = {"Authorization": f"Bearer {API_TOKEN}"} |
|
|
|
if type(prompt) != type(""): |
|
prompt = DEFAULT_PROMPT |
|
|
|
if image_style == "None style": |
|
payload = { |
|
"inputs": prompt + ", 8k", |
|
"is_negative": is_negative, |
|
"steps": steps, |
|
"cfg_scale": cfg_scale, |
|
"seed": seed if seed is not None else random.randint(-1, 2147483647) |
|
} |
|
elif image_style == "Cinematic": |
|
payload = { |
|
"inputs": prompt + ", realistic, detailed, textured, skin, hair, eyes, by Alex Huguet, Mike Hill, Ian Spriggs, JaeCheol Park, Marek Denko", |
|
"is_negative": is_negative + ", abstract, cartoon, stylized", |
|
"steps": steps, |
|
"cfg_scale": cfg_scale, |
|
"seed": seed if seed is not None else random.randint(-1, 2147483647) |
|
} |
|
elif image_style == "Digital Art": |
|
payload = { |
|
"inputs": prompt + ", faded , vintage , nostalgic , by Jose Villa , Elizabeth Messina , Ryan Brenizer , Jonas Peterson , Jasmine Star", |
|
"is_negative": is_negative + ", sharp , modern , bright", |
|
"steps": steps, |
|
"cfg_scale": cfg_scale, |
|
"seed": seed if seed is not None else random.randint(-1, 2147483647) |
|
} |
|
elif image_style == "Portrait": |
|
payload = { |
|
"inputs": prompt + ", soft light, sharp, exposure blend, medium shot, bokeh, (hdr:1.4), high contrast, (cinematic, teal and orange:0.85), (muted colors, dim colors, soothing tones:1.3), low saturation, (hyperdetailed:1.2), (noir:0.4), (natural skin texture, hyperrealism, soft light, sharp:1.2)", |
|
"is_negative": is_negative, |
|
"steps": steps, |
|
"cfg_scale": cfg_scale, |
|
"seed": seed if seed is not None else random.randint(-1, 2147483647) |
|
} |
|
|
|
image_bytes = requests.post(API_URL, headers=headers, json=payload).content |
|
image = Image.open(io.BytesIO(image_bytes)) |
|
return image |
|
|
|
from huggingface_hub import InferenceClient |
|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
import os |
|
|
|
event_reasoning_df = pd.DataFrame( |
|
[['Use the following events as a background to answer questions related to the cause and effect of time.', 'Ok'], |
|
|
|
['What are the necessary preconditions for the next event?:X had a big meal.', 'X placed an order'], |
|
['What could happen after the next event?:X had a big meal.', 'X becomes fat'], |
|
['What is the motivation for the next event?:X had a big meal.', 'X is hungry'], |
|
['What are your feelings after the following event?:X had a big meal.', "X tastes good"], |
|
|
|
['What are the necessary preconditions for the next event?:X met his favorite star.', 'X bought a ticket'], |
|
['What could happen after the next event?:X met his favorite star.', 'X is motivated'], |
|
['What is the motivation for the next event?:X met his favorite star.', 'X wants to have some entertainment'], |
|
['What are your feelings after the following event?:X met his favorite star.', "X is in a happy mood"], |
|
|
|
['What are the necessary preconditions for the next event?: X to cheat', 'X has evil intentions'], |
|
['What could happen after the next event?:X to cheat', 'X is accused'], |
|
['What is the motivation for the next event?:X to cheat', 'X wants to get something for nothing'], |
|
['What are your feelings after the following event?:X to cheat', "X is starving and freezing in prison"], |
|
|
|
['What could happen after the next event?:X go to Istanbul', ''], |
|
], |
|
columns = ["User", "Assistant"] |
|
) |
|
|
|
Mistral_7B_client = InferenceClient( |
|
"mistralai/Mistral-7B-Instruct-v0.1" |
|
) |
|
|
|
NEED_PREFIX = 'What are the necessary preconditions for the next event?' |
|
EFFECT_PREFIX = 'What could happen after the next event?' |
|
INTENT_PREFIX = 'What is the motivation for the next event?' |
|
REACT_PREFIX = 'What are your feelings after the following event?' |
|
|
|
def format_prompt(message, history): |
|
prompt = "<s>" |
|
for user_prompt, bot_response in history: |
|
prompt += f"[INST] {user_prompt} [/INST]" |
|
prompt += f" {bot_response}</s> " |
|
prompt += f"[INST] {message} [/INST]" |
|
return prompt |
|
|
|
def generate( |
|
prompt, history, client = Mistral_7B_client, |
|
temperature=0.7, max_new_tokens=256, top_p=0.95, repetition_penalty=1.1, |
|
): |
|
temperature = float(temperature) |
|
if temperature < 1e-2: |
|
temperature = 1e-2 |
|
top_p = float(top_p) |
|
|
|
generate_kwargs = dict( |
|
temperature=temperature, |
|
max_new_tokens=max_new_tokens, |
|
top_p=top_p, |
|
repetition_penalty=repetition_penalty, |
|
do_sample=True, |
|
seed=42, |
|
) |
|
|
|
formatted_prompt = format_prompt(prompt, history) |
|
|
|
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) |
|
output = "" |
|
|
|
for response in stream: |
|
output += response.token.text |
|
yield output |
|
return output |
|
|
|
hist = event_reasoning_df.iloc[:-1, :].apply( |
|
lambda x: (x["User"], x["Assistant"]), axis = 1 |
|
) |
|
|
|
def produce_4_event(event_fact, hist = hist): |
|
NEED_PREFIX_prompt = "{}:{}".format(NEED_PREFIX, event_fact) |
|
EFFECT_PREFIX_prompt = "{}:{}".format(EFFECT_PREFIX, event_fact) |
|
INTENT_PREFIX_prompt = "{}:{}".format(INTENT_PREFIX, event_fact) |
|
REACT_PREFIX_prompt = "{}:{}".format(REACT_PREFIX, event_fact) |
|
NEED_PREFIX_output = list(generate(NEED_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1] |
|
EFFECT_PREFIX_output = list(generate(EFFECT_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1] |
|
INTENT_PREFIX_output = list(generate(INTENT_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1] |
|
REACT_PREFIX_output = list(generate(REACT_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1] |
|
NEED_PREFIX_output, EFFECT_PREFIX_output, INTENT_PREFIX_output, REACT_PREFIX_output = map(lambda x: x.replace("</s>", ""), [NEED_PREFIX_output, EFFECT_PREFIX_output, INTENT_PREFIX_output, REACT_PREFIX_output]) |
|
return { |
|
NEED_PREFIX: NEED_PREFIX_output, |
|
EFFECT_PREFIX: EFFECT_PREFIX_output, |
|
INTENT_PREFIX: INTENT_PREFIX_output, |
|
REACT_PREFIX: REACT_PREFIX_output, |
|
} |
|
|
|
def transform_4_event_as_sd_prompts(event_fact ,event_reasoning_dict, role_name = "superman"): |
|
req = {} |
|
for k, v in event_reasoning_dict.items(): |
|
if type(role_name) == type("") and role_name.strip(): |
|
v_ = v.replace("X", role_name) |
|
else: |
|
v_ = v |
|
req[k] = list(generate("Transform this as a prompt in stable diffusion: {}".\ |
|
format(v_), |
|
history = [], max_new_tokens = 2048))[-1].replace("</s>", "") |
|
event_fact_ = event_fact.replace("X", role_name) |
|
req["EVENT_FACT"] = list(generate("Transform this as a prompt in stable diffusion: {}".\ |
|
format(event_fact_), |
|
history = [], max_new_tokens = 2048))[-1].replace("</s>", "") |
|
req_list = [ |
|
req[INTENT_PREFIX], req[NEED_PREFIX], |
|
req["EVENT_FACT"], |
|
req[REACT_PREFIX], req[EFFECT_PREFIX] |
|
] |
|
caption_list = [ |
|
event_reasoning_dict[INTENT_PREFIX], event_reasoning_dict[NEED_PREFIX], |
|
event_fact, |
|
event_reasoning_dict[REACT_PREFIX], event_reasoning_dict[EFFECT_PREFIX] |
|
] |
|
caption_list = list(map(lambda x: x.replace("X", role_name), caption_list)) |
|
return caption_list ,req_list |
|
|
|
def batch_as_list(input_, batch_size = 3): |
|
req = [] |
|
for ele in input_: |
|
if not req or len(req[-1]) >= batch_size: |
|
req.append([ele]) |
|
else: |
|
req[-1].append(ele) |
|
return req |
|
|
|
def add_margin(pil_img, top, right, bottom, left, color): |
|
width, height = pil_img.size |
|
new_width = width + right + left |
|
new_height = height + top + bottom |
|
result = Image.new(pil_img.mode, (new_width, new_height), color) |
|
result.paste(pil_img, (left, top)) |
|
return result |
|
|
|
def add_caption_on_image(input_image, caption, marg_ratio = 0.15, row_token_num = 6): |
|
from uuid import uuid1 |
|
assert hasattr(input_image, "save") |
|
max_image_size = max(input_image.size) |
|
marg_size = int(marg_ratio * max_image_size) |
|
colors, pixel_count = extcolors.extract_from_image(input_image) |
|
input_image = add_margin(input_image, marg_size, 0, 0, marg_size, colors[0][0]) |
|
''' |
|
tmp_name = "{}.png".format(uuid1()) |
|
input_image.save(tmp_name) |
|
ImageCaptioner.add_captions(tmp_name, |
|
caption, |
|
overwrite = 1, |
|
size = int(marg_size / 4), |
|
align = "TOP_LEFT", |
|
output = tmp_name, |
|
color = "black", |
|
) |
|
output_image = Image.open(tmp_name) |
|
os.remove(tmp_name) |
|
''' |
|
font = ImageFont.truetype("DejaVuSerif-Italic.ttf" ,int(marg_size / 4)) |
|
caption_token_list = list(map(lambda x: x.strip() ,caption.split(" "))) |
|
caption_list = list(map(" ".join ,batch_as_list(caption_token_list, row_token_num))) |
|
draw = ImageDraw.Draw(input_image) |
|
for line_num ,line_caption in enumerate(caption_list): |
|
position = ( |
|
int(marg_size / 4) * (line_num + 1) * 1.1 , |
|
(int(marg_size / 4) * ( |
|
(line_num + 1) * 1.1 |
|
))) |
|
draw.text(position, line_caption, fill="black", font = font) |
|
|
|
return input_image |
|
|
|
|
|
def expand2square(pil_img, background_color): |
|
width, height = pil_img.size |
|
if width == height: |
|
return pil_img |
|
elif width > height: |
|
result = Image.new(pil_img.mode, (width, width), background_color) |
|
result.paste(pil_img, (0, (width - height))) |
|
return result |
|
else: |
|
result = Image.new(pil_img.mode, (height, height), background_color) |
|
result.paste(pil_img, ((height - width))) |
|
return result |
|
|
|
def generate_video(images, video_name = 'ppt.avi'): |
|
import cv2 |
|
from uuid import uuid1 |
|
im_names = [] |
|
for im in images: |
|
name = "{}.png".format(uuid1()) |
|
im.save(name) |
|
im_names.append(name) |
|
frame = cv2.imread(im_names[0]) |
|
|
|
|
|
|
|
height, width, layers = frame.shape |
|
|
|
video = cv2.VideoWriter(video_name, 0, 1, (width, height)) |
|
|
|
|
|
for name in im_names: |
|
video.write(cv2.imread(name)) |
|
os.remove(name) |
|
|
|
|
|
|
|
video.release() |
|
|
|
def make_video_from_image_list(image_list, video_name = "ppt.avi"): |
|
if os.path.exists(video_name): |
|
os.remove(video_name) |
|
assert all(map(lambda x: hasattr(x, "save"), image_list)) |
|
max_size = list(map(max ,zip(*map(lambda x: x.size, image_list)))) |
|
max_size = max(max_size) |
|
image_list = list(map(lambda x: expand2square(x, |
|
extcolors.extract_from_image(x)[0][0][0] |
|
).resize((max_size, max_size)), image_list)) |
|
|
|
generate_video(image_list, video_name = video_name) |
|
return video_name |
|
|
|
''' |
|
style_transfer_client = Client("https://svjack-super-resolution-neural-style-transfer.hf.space") |
|
def style_transfer_func(content_img, style_img, style_transfer_client = style_transfer_client): |
|
from uuid import uuid1 |
|
assert hasattr(content_img, "save") |
|
assert hasattr(style_img, "save") |
|
content_im_name = "{}.png".format(uuid1()) |
|
style_im_name = "{}.png".format(uuid1()) |
|
content_img.save(content_im_name) |
|
style_img.save(style_im_name) |
|
out = style_transfer_client.predict( |
|
content_im_name, |
|
style_im_name, |
|
"none", |
|
fn_index=1 |
|
) |
|
os.remove(content_im_name) |
|
os.remove(style_im_name) |
|
return Image.open(out) |
|
''' |
|
def style_transfer_func(content_img, style_img): |
|
assert hasattr(content_img, "save") |
|
assert hasattr(style_img, "save") |
|
colors, pixel_count = extcolors.extract_from_image(style_img) |
|
if colors and colors[0][0] == (255, 255, 255) and (colors[0][1] / sum(map(lambda t2: t2[1] ,colors)) > 0.95): |
|
return content_img |
|
content_image_input = np.asarray(content_img) |
|
style_image_input = np.asarray(style_img) |
|
out = perform_neural_transfer(content_image_input, style_image_input) |
|
assert hasattr(out, "save") |
|
return out |
|
|
|
|
|
def gen_images_from_event_fact(current_model, event_fact = DEFAULT_PROMPT, role_name = DEFAULT_ROLE, |
|
style_pic = None |
|
): |
|
event_reasoning_dict = produce_4_event(event_fact) |
|
caption_list ,event_reasoning_sd_list = transform_4_event_as_sd_prompts(event_fact , |
|
event_reasoning_dict, |
|
role_name = role_name |
|
) |
|
img_list = [] |
|
for prompt in tqdm(event_reasoning_sd_list): |
|
im = generate_txt2img(current_model, prompt, is_negative=False, image_style="None style") |
|
img_list.append(im) |
|
sleep(2) |
|
img_list = list(filter(lambda x: hasattr(x, "save"), img_list)) |
|
if style_pic is not None and hasattr(style_pic, "size"): |
|
style_pic = Image.fromarray(style_pic.astype(np.uint8)) |
|
print("perform styling.....") |
|
img_list_ = [] |
|
for x in tqdm(img_list): |
|
img_list_.append(style_transfer_func(x, style_pic)) |
|
img_list = img_list_ |
|
img_list = list(map(lambda t2: add_caption_on_image(t2[0], t2[1]) ,zip(*[img_list, caption_list]))) |
|
img_mid = img_list[2] |
|
img_list_reordered = [img_mid] |
|
for ele in img_list: |
|
if ele not in img_list_reordered: |
|
img_list_reordered.append(ele) |
|
video_path = make_video_from_image_list(img_list_reordered) |
|
return video_path |
|
|
|
def image_click(images, evt: gr.SelectData, |
|
): |
|
|
|
|
|
img_selected = images[evt.index][0]["name"] |
|
|
|
return img_selected |
|
|
|
def get_book_covers(): |
|
covers = pd.Series( |
|
list(pathlib.Path("book_cover_dir").rglob("*.jpg")) + \ |
|
list(pathlib.Path("book_cover_dir").rglob("*.png")) + \ |
|
list(pathlib.Path("book_cover_dir").rglob("*.jpeg")) |
|
).map(str).map(lambda x: np.nan if x.split("/")[-1].startswith("_") else x).dropna().map( |
|
lambda x: (x, "".join(x.split(".")[:-1]).split("/")[-1]) |
|
).values.tolist() |
|
covers = sorted(covers, key = lambda t2: int(DEFAULT_BOOK_COVER in t2[0]), reverse = True) |
|
return covers |
|
|
|
with gr.Blocks(css=".caption-label {display:none}") as demo: |
|
favicon = '<img src="" width="48px" style="display: inline">' |
|
gr.Markdown( |
|
f"""<h1><center> 🎥💬 Comet Atomic Story Teller</center></h1> |
|
""" |
|
) |
|
with gr.Row(): |
|
with gr.Column(elem_id="prompt-container"): |
|
current_model = gr.Dropdown(label="Current Model", choices=list_models, value="Pixel-Art-XL") |
|
style_reference_input_gallery = gr.Gallery(get_book_covers(), |
|
|
|
height = 512, |
|
label = "StoryBook Cover (click to use)") |
|
with gr.Column(elem_id="prompt-container"): |
|
|
|
style_reference_input_image = gr.Image( |
|
label = "StoryBook Cover (you can upload yourself or click from left gallery)", |
|
|
|
value = DEFAULT_BOOK_COVER, |
|
interactive = True, |
|
) |
|
''' |
|
super_resolution_type = gr.Radio(choices = ["SD(Standard Definition)" ,"HD(High Definition)"], |
|
value="SD(Standard Definition)", label="Story Video Quality", |
|
interactive = True) |
|
''' |
|
|
|
with gr.Row(): |
|
text_prompt = gr.Textbox(label="Event Prompt", placeholder=DEFAULT_PROMPT, |
|
lines=1, elem_id="prompt-text-input", value = DEFAULT_PROMPT, |
|
info = "You should set the prompt in format 'X do something', X is the role in the right." |
|
) |
|
role_name = gr.Textbox(label="Role (X)", placeholder=DEFAULT_ROLE, lines=1, |
|
elem_id="prompt-text-input", value = DEFAULT_ROLE, |
|
info = "You should set the Role (X) with some famous man (like: Confucius Superman)" |
|
) |
|
text_button = gr.Button("Generate", variant='primary', elem_id="gen-button") |
|
|
|
with gr.Row(): |
|
|
|
|
|
video_output = gr.Video(label = "Story Video", elem_id="gallery") |
|
|
|
|
|
style_reference_input_gallery.select( |
|
image_click, style_reference_input_gallery, style_reference_input_image |
|
) |
|
|
|
text_button.click(gen_images_from_event_fact, inputs=[current_model, text_prompt, role_name, style_reference_input_image], |
|
outputs=video_output) |
|
|
|
|
|
|
|
|
|
demo.launch(show_api=False) |
|
|