Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -16,7 +16,7 @@ import spaces
|
|
16 |
from profanityfilter import ProfanityFilter
|
17 |
|
18 |
import torch
|
19 |
-
from diffusers import DiffusionPipeline,
|
20 |
|
21 |
# Set device
|
22 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
@@ -58,13 +58,116 @@ def prepare_image_for_watermark(image):
|
|
58 |
background.save('custom_tabi.jpg')
|
59 |
|
60 |
return background
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
@spaces.GPU(enable_queue=True)
|
63 |
def text2img_inference(prompt):
|
64 |
gr.Info('Image generation request sent')
|
65 |
with torch.no_grad():
|
66 |
image = text2img_pipe(
|
67 |
-
prompt=
|
68 |
negative_prompt=negative_prompt,
|
69 |
width=1024,
|
70 |
height=1024,
|
@@ -72,19 +175,36 @@ def text2img_inference(prompt):
|
|
72 |
guidance_scale=7.5
|
73 |
).images[0]
|
74 |
|
75 |
-
return image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
|
82 |
gradio_app = gr.Interface(
|
83 |
fn=generate_image,
|
84 |
inputs=[gr.Text(label="User Keywords"), gr.Image(label="Input Image", type='pil'), gr.Text(label="Generated Prompt")],
|
85 |
outputs=[gr.Image(label="Image Generation"), gr.Text(label="Image Prompt")],
|
86 |
title="Custom Tabi",
|
87 |
-
description="Enter
|
88 |
)
|
89 |
|
90 |
gradio_app.launch(debug=True)
|
|
|
16 |
from profanityfilter import ProfanityFilter
|
17 |
|
18 |
import torch
|
19 |
+
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
|
20 |
|
21 |
# Set device
|
22 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
58 |
background.save('custom_tabi.jpg')
|
59 |
|
60 |
return background
|
61 |
+
|
62 |
+
# Initialize the profanity filter
|
63 |
+
pf = ProfanityFilter()
|
64 |
+
def filter_inappropriate(input_text):
|
65 |
+
# Filter out inappropriate words
|
66 |
+
pf.censor_char = ' '
|
67 |
+
filtered_text = pf.censor(input_text)
|
68 |
+
return filtered_text.strip()
|
69 |
+
|
70 |
+
# find the closest color name to rgb value
|
71 |
+
def closest_color(rgb_color):
|
72 |
+
min_colors = {}
|
73 |
+
for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
|
74 |
+
r_c, g_c, b_c = webcolors.hex_to_rgb(key)
|
75 |
+
rd = (r_c - rgb_color[0]) ** 2
|
76 |
+
gd = (g_c - rgb_color[1]) ** 2
|
77 |
+
bd = (b_c - rgb_color[2]) ** 2
|
78 |
+
min_colors[(rd + gd + bd)] = name
|
79 |
+
return min_colors[min(min_colors.keys())]
|
80 |
+
|
81 |
+
def get_dominant_colors(img_filepath):
|
82 |
+
# Load the image from file path
|
83 |
+
img_data = Image.open(img_filepath)
|
84 |
+
|
85 |
+
# Convert the image to a NumPy array
|
86 |
+
img = np.array(img_data)
|
87 |
+
|
88 |
+
# k-means clustering to create palette of most dominant n_colors
|
89 |
+
pixels = np.float32(img.reshape(-1, 3))
|
90 |
+
|
91 |
+
n_colors = 2
|
92 |
+
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, .1)
|
93 |
+
flags = cv2.KMEANS_RANDOM_CENTERS
|
94 |
+
|
95 |
+
_, labels, palette = cv2.kmeans(pixels, n_colors, None, criteria, 10, flags)
|
96 |
+
|
97 |
+
# get names of dominant colors
|
98 |
+
dominant_colors = []
|
99 |
+
for color in palette:
|
100 |
+
color_name = closest_color(color)
|
101 |
+
dominant_colors.append(color_name)
|
102 |
+
|
103 |
+
return dominant_colors
|
104 |
+
|
105 |
+
def get_image_caption(image):
|
106 |
+
client = Client("https://vikhyatk-moondream1.hf.space/")
|
107 |
+
result = client.predict(
|
108 |
+
image, # filepath in 'image' Image component
|
109 |
+
"Describe the colors, patterns, aesthetic, artistic style, and objects in this photo", # str in 'Question' Textbox component
|
110 |
+
api_name="/answer_question"
|
111 |
+
)
|
112 |
+
print(result)
|
113 |
+
return result
|
114 |
+
|
115 |
+
def get_image_keywords(image):
|
116 |
+
# get img2text description
|
117 |
+
caption = get_image_caption(image)
|
118 |
+
|
119 |
+
# get colors
|
120 |
+
colors_list = get_dominant_colors(image)
|
121 |
+
colors = ", ".join(colors_list)
|
122 |
+
return caption, colors
|
123 |
+
|
124 |
+
from transformers import pipeline
|
125 |
+
pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")
|
126 |
+
|
127 |
+
@spaces.GPU(enable_queue=True)
|
128 |
+
def construct_prompt(image_caption, image_colors, user_input):
|
129 |
+
agent_maker_sys = f"""
|
130 |
+
You are a AI whose job is to help users create their own custom shoe image which will reflect the characteristics, aesthetics, or include objects from an image described by users.
|
131 |
+
In particular, you need to respond succintly and write a prompt for an image generation model. The response must include to the word "mm-tabi" which will trigger the style of shoe. The response should avoid any descriptions of man or woman and don't include any articles of clothing from Caption.
|
132 |
+
The response should always end with "still life in the style of retrofuturism, highly detailed, hd, 8k".
|
133 |
+
The response should only use one or two aspects from the Caption provided by the user that could easily be applied to a still life scene or characteristic of the shoe, like color or texture or an object.
|
134 |
+
For example, if a user says,
|
135 |
+
"Colors: gainsboro, sienna
|
136 |
+
/n Keywords: summer casual chic
|
137 |
+
/n Caption: The image features a man wearing a red and yellow sweater, which has a leopard print design. He is posing for the camera, and the sweater is placed on a mannequin, adding a touch of artistic style to the scene. The photo also includes a book, which is placed on a surface, possibly a table or a shelf. The book's presence adds an element of interest and contrast to the overall aesthetic of the image. The combination of the man in the sweater, the mannequin, and the book creates a visually appealing and artistic composition."
|
138 |
+
, provide immediately an image prompt that describes a still life photo of a shoe corresponding to the keywords, color, and objects or stylistic elements from the caption provided.
|
139 |
+
Immediately STOP after that. It should be in this format:
|
140 |
+
"surreal photo of mm-tabi boot with split toe, surrounded by summer casual chic, red and yellow knit with leopard print, still life in the style of retrofuturism, fantasy, pimped, gainsboro, darkgray, darkolivegreen, sienna, highly detailed, hd, 8k"
|
141 |
+
|
142 |
+
Here's another example, if a user says,
|
143 |
+
"Colors: darkslategray, linen
|
144 |
+
/n Keywords: baroque dystopia
|
145 |
+
/n Caption: The image features a young man wearing a jacket and a pair of black sunglasses. The jacket has a gray color with a patterned design, and the sunglasses are black and gray as well. The man is posing for the camera, and his facial expression is neutral. The overall aesthetic of the photo is casual and laid-back, with a focus on the sunglasses and the jacket as the main objects in the scene."
|
146 |
+
; then respond:
|
147 |
+
"surreal photo of mm-tabi boot with split toe, surrounded by a baroque dystopia, gray patterned design, casual and laid-back, still life in the style of retrofuturism, dreamy, unconventional, darkslategray, linen, silver, highly detailed, hd, 8k"
|
148 |
+
"""
|
149 |
+
|
150 |
+
instruction = f"""
|
151 |
+
<|system|>
|
152 |
+
{agent_maker_sys}</s>
|
153 |
+
<|user|>
|
154 |
+
"""
|
155 |
+
|
156 |
+
prompt = f"{instruction.strip()}\n Colors: {image_colors} \n Keywords: {user_input} \n Caption: {image_caption}</s>"
|
157 |
+
print(f"PROMPT: {prompt}")
|
158 |
+
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
|
159 |
+
|
160 |
+
pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>'
|
161 |
+
cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
|
162 |
+
|
163 |
+
return cleaned_text.lstrip("\n")
|
164 |
|
165 |
@spaces.GPU(enable_queue=True)
|
166 |
def text2img_inference(prompt):
|
167 |
gr.Info('Image generation request sent')
|
168 |
with torch.no_grad():
|
169 |
image = text2img_pipe(
|
170 |
+
prompt=prompt,
|
171 |
negative_prompt=negative_prompt,
|
172 |
width=1024,
|
173 |
height=1024,
|
|
|
175 |
guidance_scale=7.5
|
176 |
).images[0]
|
177 |
|
178 |
+
return image
|
179 |
+
|
180 |
+
def generate_image(user_input, input_image, image_prompt):
|
181 |
+
if image_prompt == "":
|
182 |
+
# generate keywords from image
|
183 |
+
gr.Info('Starting to generate caption for input image')
|
184 |
+
img_caption, img_colors = get_image_keywords(input_image)
|
185 |
+
|
186 |
+
# filter user input
|
187 |
+
gr.Info('Processing keywords for inappropriate language')
|
188 |
+
user_input = filter_inappropriate(prompt)
|
189 |
+
|
190 |
+
# construct prompt from image caption, image colors, and user input
|
191 |
+
gr.Info('Consrtucting prompt')
|
192 |
+
full_prompt = construct_prompt(img_caption, img_colors, user_input)
|
193 |
+
print(f"FULL PROMPT: {full_prompt}")
|
194 |
+
|
195 |
+
prompt = image_prompt if image_prompt != "" else full_prompt
|
196 |
|
197 |
+
# text2img generation with full prompt construction
|
198 |
+
image = text2img_inference(prompt)
|
199 |
+
watermarkable_image = prepare_image_for_watermark(image)
|
200 |
+
return watermarkable_image, prompt
|
201 |
|
202 |
gradio_app = gr.Interface(
|
203 |
fn=generate_image,
|
204 |
inputs=[gr.Text(label="User Keywords"), gr.Image(label="Input Image", type='pil'), gr.Text(label="Generated Prompt")],
|
205 |
outputs=[gr.Image(label="Image Generation"), gr.Text(label="Image Prompt")],
|
206 |
title="Custom Tabi",
|
207 |
+
description="Enter keywords and upload image to generate a custom Tabi boot"
|
208 |
)
|
209 |
|
210 |
gradio_app.launch(debug=True)
|