Comet-Atomic-Story-Teller

Running

App Files Files Community

Comet-Atomic-Story-Teller / app.py

svjack

Update app.py

6f6f4a0 8 months ago

raw

history blame

No virus

22.7 kB

	'''
	pip install extcolors
	'''

	import os
	import tensorflow as tf
	os.environ['TFHUB_MODEL_LOAD_FORMAT'] = 'COMPRESSED'
	import numpy as np
	import PIL.Image
	import gradio as gr
	import tensorflow_hub as hub
	import matplotlib.pyplot as plt
	#from real_esrgan_app import *

	import gradio as gr
	import requests
	import io
	import random
	import os
	from PIL import Image, ImageDraw, ImageFont

	from datasets import load_dataset
	import pandas as pd
	from time import sleep
	from tqdm import tqdm

	import extcolors
	from gradio_client import Client

	import cv2
	import numpy as np
	import glob
	import pathlib

	API_TOKEN = os.environ.get("HF_READ_TOKEN")
	API_TOKEN = "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"

	'''
	dataset = load_dataset("Gustavosta/Stable-Diffusion-Prompts")
	prompt_df = dataset["train"].to_pandas()
	prompt_df = pd.read_csv("Stable-Diffusion-Prompts.csv")
	'''

	#DEFAULT_MODEL = "stabilityai/stable-diffusion-2-1"
	#DEFAULT_PROMPT = "1girl, aqua eyes, baseball cap, blonde hair, closed mouth, earrings, green background, hat, hoop earrings, jewelry, looking at viewer, shirt, short hair, simple background, solo, upper body, yellow shirt"
	DEFAULT_PROMPT = "X go to Istanbul"
	DEFAULT_ROLE = "Superman"
	DEFAULT_BOOK_COVER = "book_cover_dir/Blank.jpg"

	hub_module = hub.load('https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2')

	def tensor_to_image(tensor):
	tensor = tensor*255
	tensor = np.array(tensor, dtype=np.uint8)
	if np.ndim(tensor)>3:
	assert tensor.shape[0] == 1
	tensor = tensor[0]
	return PIL.Image.fromarray(tensor)


	def perform_neural_transfer(content_image_input, style_image_input, hub_module = hub_module):
	content_image = content_image_input.astype(np.float32)[np.newaxis, ...] / 255.
	content_image = tf.image.resize(content_image, (400, 600))

	#style_image_input = style_urls[style_image_input]
	#style_image_input = plt.imread(style_image_input)
	style_image = style_image_input.astype(np.float32)[np.newaxis, ...] / 255.

	style_image = tf.image.resize(style_image, (256, 256))

	outputs = hub_module(tf.constant(content_image), tf.constant(style_image))
	stylized_image = outputs[0]

	stylized_image = tensor_to_image(stylized_image)
	content_image_input = tensor_to_image(content_image_input)
	stylized_image = stylized_image.resize(content_image_input.size)
	return stylized_image
	'''
	print("super_resolution_type :")
	print(super_resolution_type)
	#print(super_resolution_type.value)

	if super_resolution_type not in ["base", "anime"]:
	return stylized_image
	else:
	print("call else :")
	stylized_image = inference(stylized_image, super_resolution_type)
	return stylized_image
	'''

	list_models = [
	#"SDXL-1.0",
	"Pixel-Art-XL",
	"SD-1.5",
	"OpenJourney-V4",
	"Anything-V4",
	"Disney-Pixar-Cartoon",
	"Dalle-3-XL",
	#"Midjourney-V4-XL",
	]

	#list_prompts = get_samples()

	def generate_txt2img(current_model, prompt, is_negative=False, image_style="None style", steps=50, cfg_scale=7,
	seed=None, API_TOKEN = API_TOKEN):

	'''
	if current_model == "SD-1.5":
	API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
	elif current_model == "SDXL-1.0":
	API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0"
	elif current_model == "OpenJourney-V4":
	API_URL = "https://api-inference.huggingface.co/models/prompthero/openjourney"
	elif current_model == "Anything-V4":
	API_URL = "https://api-inference.huggingface.co/models/xyn-ai/anything-v4.0"
	elif current_model == "Disney-Pixar-Cartoon":
	API_URL = "https://api-inference.huggingface.co/models/stablediffusionapi/disney-pixar-cartoon"
	elif current_model == "Pixel-Art-XL":
	API_URL = "https://api-inference.huggingface.co/models/nerijs/pixel-art-xl"
	elif current_model == "Dalle-3-XL":
	API_URL = "https://api-inference.huggingface.co/models/openskyml/dalle-3-xl"
	elif current_model == "Midjourney-V4-XL":
	API_URL = "https://api-inference.huggingface.co/models/openskyml/midjourney-v4-xl"
	'''
	if current_model == "SD-1.5":
	API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
	elif current_model == "OpenJourney-V4":
	API_URL = "https://api-inference.huggingface.co/models/prompthero/openjourney"
	elif current_model == "Anything-V4":
	API_URL = "https://api-inference.huggingface.co/models/xyn-ai/anything-v4.0"
	elif current_model == "Disney-Pixar-Cartoon":
	API_URL = "https://api-inference.huggingface.co/models/stablediffusionapi/disney-pixar-cartoon"
	elif current_model == "Pixel-Art-XL":
	API_URL = "https://api-inference.huggingface.co/models/nerijs/pixel-art-xl"
	elif current_model == "Dalle-3-XL":
	API_URL = "https://api-inference.huggingface.co/models/openskyml/dalle-3-xl"


	#API_TOKEN = os.environ.get("HF_READ_TOKEN")
	headers = {"Authorization": f"Bearer {API_TOKEN}"}

	if type(prompt) != type(""):
	prompt = DEFAULT_PROMPT

	if image_style == "None style":
	payload = {
	"inputs": prompt + ", 8k",
	"is_negative": is_negative,
	"steps": steps,
	"cfg_scale": cfg_scale,
	"seed": seed if seed is not None else random.randint(-1, 2147483647)
	}
	elif image_style == "Cinematic":
	payload = {
	"inputs": prompt + ", realistic, detailed, textured, skin, hair, eyes, by Alex Huguet, Mike Hill, Ian Spriggs, JaeCheol Park, Marek Denko",
	"is_negative": is_negative + ", abstract, cartoon, stylized",
	"steps": steps,
	"cfg_scale": cfg_scale,
	"seed": seed if seed is not None else random.randint(-1, 2147483647)
	}
	elif image_style == "Digital Art":
	payload = {
	"inputs": prompt + ", faded , vintage , nostalgic , by Jose Villa , Elizabeth Messina , Ryan Brenizer , Jonas Peterson , Jasmine Star",
	"is_negative": is_negative + ", sharp , modern , bright",
	"steps": steps,
	"cfg_scale": cfg_scale,
	"seed": seed if seed is not None else random.randint(-1, 2147483647)
	}
	elif image_style == "Portrait":
	payload = {
	"inputs": prompt + ", soft light, sharp, exposure blend, medium shot, bokeh, (hdr:1.4), high contrast, (cinematic, teal and orange:0.85), (muted colors, dim colors, soothing tones:1.3), low saturation, (hyperdetailed:1.2), (noir:0.4), (natural skin texture, hyperrealism, soft light, sharp:1.2)",
	"is_negative": is_negative,
	"steps": steps,
	"cfg_scale": cfg_scale,
	"seed": seed if seed is not None else random.randint(-1, 2147483647)
	}

	image_bytes = requests.post(API_URL, headers=headers, json=payload).content
	image = Image.open(io.BytesIO(image_bytes))
	return image

	from huggingface_hub import InferenceClient
	import gradio as gr
	import pandas as pd
	import numpy as np
	import os

	event_reasoning_df = pd.DataFrame(
	[['Use the following events as a background to answer questions related to the cause and effect of time.', 'Ok'],

	['What are the necessary preconditions for the next event?：X had a big meal.', 'X placed an order'],
	['What could happen after the next event?：X had a big meal.', 'X becomes fat'],
	['What is the motivation for the next event?：X had a big meal.', 'X is hungry'],
	['What are your feelings after the following event?：X had a big meal.', "X tastes good"],

	['What are the necessary preconditions for the next event?：X met his favorite star.', 'X bought a ticket'],
	['What could happen after the next event?：X met his favorite star.', 'X is motivated'],
	['What is the motivation for the next event?：X met his favorite star.', 'X wants to have some entertainment'],
	['What are your feelings after the following event?：X met his favorite star.', "X is in a happy mood"],

	['What are the necessary preconditions for the next event?: X to cheat', 'X has evil intentions'],
	['What could happen after the next event?：X to cheat', 'X is accused'],
	['What is the motivation for the next event?：X to cheat', 'X wants to get something for nothing'],
	['What are your feelings after the following event?：X to cheat', "X is starving and freezing in prison"],

	['What could happen after the next event?：X go to Istanbul', ''],
	],
	columns = ["User", "Assistant"]
	)

	Mistral_7B_client = InferenceClient(
	"mistralai/Mistral-7B-Instruct-v0.1"
	)

	NEED_PREFIX = 'What are the necessary preconditions for the next event?'
	EFFECT_PREFIX = 'What could happen after the next event?'
	INTENT_PREFIX = 'What is the motivation for the next event?'
	REACT_PREFIX = 'What are your feelings after the following event?'

	def format_prompt(message, history):
	prompt = "<s>"
	for user_prompt, bot_response in history:
	prompt += f"[INST] {user_prompt} [/INST]"
	prompt += f" {bot_response}</s> "
	prompt += f"[INST] {message} [/INST]"
	return prompt

	def generate(
	prompt, history, client = Mistral_7B_client,
	temperature=0.7, max_new_tokens=256, top_p=0.95, repetition_penalty=1.1,
	):
	temperature = float(temperature)
	if temperature < 1e-2:
	temperature = 1e-2
	top_p = float(top_p)

	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	seed=42,
	)

	formatted_prompt = format_prompt(prompt, history)

	stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
	output = ""

	for response in stream:
	output += response.token.text
	yield output
	return output

	hist = event_reasoning_df.iloc[:-1, :].apply(
	lambda x: (x["User"], x["Assistant"]), axis = 1
	)

	def produce_4_event(event_fact, hist = hist):
	NEED_PREFIX_prompt = "{}：{}".format(NEED_PREFIX, event_fact)
	EFFECT_PREFIX_prompt = "{}：{}".format(EFFECT_PREFIX, event_fact)
	INTENT_PREFIX_prompt = "{}：{}".format(INTENT_PREFIX, event_fact)
	REACT_PREFIX_prompt = "{}：{}".format(REACT_PREFIX, event_fact)
	NEED_PREFIX_output = list(generate(NEED_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1]
	EFFECT_PREFIX_output = list(generate(EFFECT_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1]
	INTENT_PREFIX_output = list(generate(INTENT_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1]
	REACT_PREFIX_output = list(generate(REACT_PREFIX_prompt, history = hist, max_new_tokens = 2048))[-1]
	NEED_PREFIX_output, EFFECT_PREFIX_output, INTENT_PREFIX_output, REACT_PREFIX_output = map(lambda x: x.replace("</s>", ""), [NEED_PREFIX_output, EFFECT_PREFIX_output, INTENT_PREFIX_output, REACT_PREFIX_output])
	return {
	NEED_PREFIX: NEED_PREFIX_output,
	EFFECT_PREFIX: EFFECT_PREFIX_output,
	INTENT_PREFIX: INTENT_PREFIX_output,
	REACT_PREFIX: REACT_PREFIX_output,
	}

	def transform_4_event_as_sd_prompts(event_fact ,event_reasoning_dict, role_name = "superman"):
	req = {}
	for k, v in event_reasoning_dict.items():
	if type(role_name) == type("") and role_name.strip():
	v_ = v.replace("X", role_name)
	else:
	v_ = v
	req[k] = list(generate("Transform this as a prompt in stable diffusion: {}".\
	format(v_),
	history = [], max_new_tokens = 2048))[-1].replace("</s>", "")
	event_fact_ = event_fact.replace("X", role_name)
	req["EVENT_FACT"] = list(generate("Transform this as a prompt in stable diffusion: {}".\
	format(event_fact_),
	history = [], max_new_tokens = 2048))[-1].replace("</s>", "")
	req_list = [
	req[INTENT_PREFIX], req[NEED_PREFIX],
	req["EVENT_FACT"],
	req[REACT_PREFIX], req[EFFECT_PREFIX]
	]
	caption_list = [
	event_reasoning_dict[INTENT_PREFIX], event_reasoning_dict[NEED_PREFIX],
	event_fact,
	event_reasoning_dict[REACT_PREFIX], event_reasoning_dict[EFFECT_PREFIX]
	]
	caption_list = list(map(lambda x: x.replace("X", role_name), caption_list))
	return caption_list ,req_list

	def batch_as_list(input_, batch_size = 3):
	req = []
	for ele in input_:
	if not req or len(req[-1]) >= batch_size:
	req.append([ele])
	else:
	req[-1].append(ele)
	return req

	def add_margin(pil_img, top, right, bottom, left, color):
	width, height = pil_img.size
	new_width = width + right + left
	new_height = height + top + bottom
	result = Image.new(pil_img.mode, (new_width, new_height), color)
	result.paste(pil_img, (left, top))
	return result

	def add_caption_on_image(input_image, caption, marg_ratio = 0.15, row_token_num = 6):
	from uuid import uuid1
	assert hasattr(input_image, "save")
	max_image_size = max(input_image.size)
	marg_size = int(marg_ratio * max_image_size)
	colors, pixel_count = extcolors.extract_from_image(input_image)
	input_image = add_margin(input_image, marg_size, 0, 0, marg_size, colors[0][0])
	'''
	tmp_name = "{}.png".format(uuid1())
	input_image.save(tmp_name)
	ImageCaptioner.add_captions(tmp_name,
	caption,
	overwrite = 1,
	size = int(marg_size / 4),
	align = "TOP_LEFT",
	output = tmp_name,
	color = "black",
	)
	output_image = Image.open(tmp_name)
	os.remove(tmp_name)
	'''
	font = ImageFont.truetype("DejaVuSerif-Italic.ttf" ,int(marg_size / 4))
	caption_token_list = list(map(lambda x: x.strip() ,caption.split(" ")))
	caption_list = list(map(" ".join ,batch_as_list(caption_token_list, row_token_num)))
	draw = ImageDraw.Draw(input_image)
	for line_num ,line_caption in enumerate(caption_list):
	position = (
	int(marg_size / 4) * (line_num + 1) * 1.1 ,
	(int(marg_size / 4) * (
	(line_num + 1) * 1.1
	)))
	draw.text(position, line_caption, fill="black", font = font)

	return input_image


	def expand2square(pil_img, background_color):
	width, height = pil_img.size
	if width == height:
	return pil_img
	elif width > height:
	result = Image.new(pil_img.mode, (width, width), background_color)
	result.paste(pil_img, (0, (width - height)))
	return result
	else:
	result = Image.new(pil_img.mode, (height, height), background_color)
	result.paste(pil_img, ((height - width)))
	return result

	def generate_video(images, video_name = 'ppt.avi'):
	import cv2
	from uuid import uuid1
	im_names = []
	for im in images:
	name = "{}.png".format(uuid1())
	im.save(name)
	im_names.append(name)
	frame = cv2.imread(im_names[0])

	# setting the frame width, height width
	# the width, height of first image
	height, width, layers = frame.shape

	video = cv2.VideoWriter(video_name, 0, 1, (width, height))

	# Appending the images to the video one by one
	for name in im_names:
	video.write(cv2.imread(name))
	os.remove(name)

	# Deallocating memories taken for window creation
	#cv2.destroyAllWindows()
	video.release() # releasing the video generated

	def make_video_from_image_list(image_list, video_name = "ppt.avi"):
	if os.path.exists(video_name):
	os.remove(video_name)
	assert all(map(lambda x: hasattr(x, "save"), image_list))
	max_size = list(map(max ,zip(*map(lambda x: x.size, image_list))))
	max_size = max(max_size)
	image_list = list(map(lambda x: expand2square(x,
	extcolors.extract_from_image(x)[0][0][0]
	).resize((max_size, max_size)), image_list))

	generate_video(image_list, video_name = video_name)
	return video_name

	'''
	style_transfer_client = Client("https://svjack-super-resolution-neural-style-transfer.hf.space")
	def style_transfer_func(content_img, style_img, style_transfer_client = style_transfer_client):
	from uuid import uuid1
	assert hasattr(content_img, "save")
	assert hasattr(style_img, "save")
	content_im_name = "{}.png".format(uuid1())
	style_im_name = "{}.png".format(uuid1())
	content_img.save(content_im_name)
	style_img.save(style_im_name)
	out = style_transfer_client.predict(
	content_im_name,
	style_im_name,
	"none",
	fn_index=1
	)
	os.remove(content_im_name)
	os.remove(style_im_name)
	return Image.open(out)
	'''
	def style_transfer_func(content_img, style_img):
	assert hasattr(content_img, "save")
	assert hasattr(style_img, "save")
	colors, pixel_count = extcolors.extract_from_image(style_img)
	if colors and colors[0][0] == (255, 255, 255) and (colors[0][1] / sum(map(lambda t2: t2[1] ,colors)) > 0.95):
	return content_img
	content_image_input = np.asarray(content_img)
	style_image_input = np.asarray(style_img)
	out = perform_neural_transfer(content_image_input, style_image_input)
	assert hasattr(out, "save")
	return out


	def gen_images_from_event_fact(current_model, event_fact = DEFAULT_PROMPT, role_name = DEFAULT_ROLE,
	style_pic = None
	):
	event_reasoning_dict = produce_4_event(event_fact)
	caption_list ,event_reasoning_sd_list = transform_4_event_as_sd_prompts(event_fact ,
	event_reasoning_dict,
	role_name = role_name
	)
	img_list = []
	for prompt in tqdm(event_reasoning_sd_list):
	im = generate_txt2img(current_model, prompt, is_negative=False, image_style="None style")
	img_list.append(im)
	sleep(2)
	img_list = list(filter(lambda x: hasattr(x, "save"), img_list))
	if style_pic is not None and hasattr(style_pic, "size"):
	style_pic = Image.fromarray(style_pic.astype(np.uint8))
	print("perform styling.....")
	img_list_ = []
	for x in tqdm(img_list):
	img_list_.append(style_transfer_func(x, style_pic))
	img_list = img_list_
	img_list = list(map(lambda t2: add_caption_on_image(t2[0], t2[1]) ,zip(*[img_list, caption_list])))
	img_mid = img_list[2]
	img_list_reordered = [img_mid]
	for ele in img_list:
	if ele not in img_list_reordered:
	img_list_reordered.append(ele)
	video_path = make_video_from_image_list(img_list_reordered)
	return video_path

	def image_click(images, evt: gr.SelectData,
	):
	#print(images)
	#print(evt.index)
	img_selected = images[evt.index][0]["name"]
	#print(img_selected)
	return img_selected

	def get_book_covers():
	covers = pd.Series(
	list(pathlib.Path("book_cover_dir").rglob("*.jpg")) + \
	list(pathlib.Path("book_cover_dir").rglob("*.png")) + \
	list(pathlib.Path("book_cover_dir").rglob("*.jpeg"))
	).map(str).map(lambda x: np.nan if x.split("/")[-1].startswith("_") else x).dropna().map(
	lambda x: (x, "".join(x.split(".")[:-1]).split("/")[-1])
	).values.tolist()
	covers = sorted(covers, key = lambda t2: int(DEFAULT_BOOK_COVER in t2[0]), reverse = True)
	return covers

	with gr.Blocks(css=".caption-label {display:none}") as demo:
	favicon = '<img src="" width="48px" style="display: inline">'
	gr.Markdown(
	f"""<h1><center> 🎥💬 Comet Atomic Story Teller</center></h1>
	"""
	)
	with gr.Row():
	with gr.Column(elem_id="prompt-container"):
	current_model = gr.Dropdown(label="Current Model", choices=list_models, value="Pixel-Art-XL")
	style_reference_input_gallery = gr.Gallery(get_book_covers(),
	#width = 512,
	height = 512,
	label = "StoryBook Cover (click to use)")
	with gr.Column(elem_id="prompt-container"):
	#with gr.Row(elem_id="prompt-container"):
	style_reference_input_image = gr.Image(
	label = "StoryBook Cover (you can upload yourself or click from left gallery)",
	#width = 512,
	value = DEFAULT_BOOK_COVER,
	interactive = True,
	)
	'''
	super_resolution_type = gr.Radio(choices = ["SD(Standard Definition)" ,"HD(High Definition)"],
	value="SD(Standard Definition)", label="Story Video Quality",
	interactive = True)
	'''

	with gr.Row():
	text_prompt = gr.Textbox(label="Event Prompt", placeholder=DEFAULT_PROMPT,
	lines=1, elem_id="prompt-text-input", value = DEFAULT_PROMPT,
	info = "You should set the prompt in format 'X do something', X is the role in the right."
	)
	role_name = gr.Textbox(label="Role (X)", placeholder=DEFAULT_ROLE, lines=1,
	elem_id="prompt-text-input", value = DEFAULT_ROLE,
	info = "You should set the Role (X) with some famous man (like: Confucius Superman)"
	)
	text_button = gr.Button("Generate", variant='primary', elem_id="gen-button")

	with gr.Row():
	#image_output = gr.Image(type="pil", label="Output Image", elem_id="gallery")
	#image_output = gr.Gallery(label="Output Images", elem_id="gallery")
	video_output = gr.Video(label = "Story Video", elem_id="gallery")

	#text_button.click(generate_txt2img, inputs=[current_model, text_prompt, negative_prompt, image_style], outputs=image_output)
	style_reference_input_gallery.select(
	image_click, style_reference_input_gallery, style_reference_input_image
	)

	text_button.click(gen_images_from_event_fact, inputs=[current_model, text_prompt, role_name, style_reference_input_image],
	outputs=video_output)

	#select_button.click(generate_txt2img, inputs=[current_model, select_prompt, negative_prompt, image_style], outputs=image_output)
	#demo.load(get_params, None, select_prompt)

	demo.launch(show_api=False)