sessex commited on
Commit
c016545
1 Parent(s): 0879273

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -8
app.py CHANGED
@@ -16,7 +16,7 @@ import spaces
16
  from profanityfilter import ProfanityFilter
17
 
18
  import torch
19
- from diffusers import DiffusionPipeline, AutoencoderKL, DPMSolverMultistepScheduler
20
 
21
  # Set device
22
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -58,13 +58,116 @@ def prepare_image_for_watermark(image):
58
  background.save('custom_tabi.jpg')
59
 
60
  return background
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  @spaces.GPU(enable_queue=True)
63
  def text2img_inference(prompt):
64
  gr.Info('Image generation request sent')
65
  with torch.no_grad():
66
  image = text2img_pipe(
67
- prompt='surreal photo of a mm-tabi boot with split toe, still life in the style of retrofuturism, fantasy, unconventional, highly detailed, hd, 8k',
68
  negative_prompt=negative_prompt,
69
  width=1024,
70
  height=1024,
@@ -72,19 +175,36 @@ def text2img_inference(prompt):
72
  guidance_scale=7.5
73
  ).images[0]
74
 
75
- return image, "this is linda's ai generated caption"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- def generate_image(keywords, input_image, image_prompt):
78
- image, prompt = text2img_inference(keywords)
79
- watermarkable_image = prepare_image_for_watermark(image)
80
- return watermarkable_image, prompt
81
 
82
  gradio_app = gr.Interface(
83
  fn=generate_image,
84
  inputs=[gr.Text(label="User Keywords"), gr.Image(label="Input Image", type='pil'), gr.Text(label="Generated Prompt")],
85
  outputs=[gr.Image(label="Image Generation"), gr.Text(label="Image Prompt")],
86
  title="Custom Tabi",
87
- description="Enter a prompt with the trigger word 'TOK' to indicate in the style of maison margiela tabi shoe. Ex. oil painting still life of TOK boot on the beach, burgers and fries"
88
  )
89
 
90
  gradio_app.launch(debug=True)
 
16
  from profanityfilter import ProfanityFilter
17
 
18
  import torch
19
+ from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
20
 
21
  # Set device
22
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
58
  background.save('custom_tabi.jpg')
59
 
60
  return background
61
+
62
+ # Initialize the profanity filter
63
+ pf = ProfanityFilter()
64
+ def filter_inappropriate(input_text):
65
+ # Filter out inappropriate words
66
+ pf.censor_char = ' '
67
+ filtered_text = pf.censor(input_text)
68
+ return filtered_text.strip()
69
+
70
+ # find the closest color name to rgb value
71
+ def closest_color(rgb_color):
72
+ min_colors = {}
73
+ for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
74
+ r_c, g_c, b_c = webcolors.hex_to_rgb(key)
75
+ rd = (r_c - rgb_color[0]) ** 2
76
+ gd = (g_c - rgb_color[1]) ** 2
77
+ bd = (b_c - rgb_color[2]) ** 2
78
+ min_colors[(rd + gd + bd)] = name
79
+ return min_colors[min(min_colors.keys())]
80
+
81
+ def get_dominant_colors(img_filepath):
82
+ # Load the image from file path
83
+ img_data = Image.open(img_filepath)
84
+
85
+ # Convert the image to a NumPy array
86
+ img = np.array(img_data)
87
+
88
+ # k-means clustering to create palette of most dominant n_colors
89
+ pixels = np.float32(img.reshape(-1, 3))
90
+
91
+ n_colors = 2
92
+ criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, .1)
93
+ flags = cv2.KMEANS_RANDOM_CENTERS
94
+
95
+ _, labels, palette = cv2.kmeans(pixels, n_colors, None, criteria, 10, flags)
96
+
97
+ # get names of dominant colors
98
+ dominant_colors = []
99
+ for color in palette:
100
+ color_name = closest_color(color)
101
+ dominant_colors.append(color_name)
102
+
103
+ return dominant_colors
104
+
105
+ def get_image_caption(image):
106
+ client = Client("https://vikhyatk-moondream1.hf.space/")
107
+ result = client.predict(
108
+ image, # filepath in 'image' Image component
109
+ "Describe the colors, patterns, aesthetic, artistic style, and objects in this photo", # str in 'Question' Textbox component
110
+ api_name="/answer_question"
111
+ )
112
+ print(result)
113
+ return result
114
+
115
+ def get_image_keywords(image):
116
+ # get img2text description
117
+ caption = get_image_caption(image)
118
+
119
+ # get colors
120
+ colors_list = get_dominant_colors(image)
121
+ colors = ", ".join(colors_list)
122
+ return caption, colors
123
+
124
+ from transformers import pipeline
125
+ pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")
126
+
127
+ @spaces.GPU(enable_queue=True)
128
+ def construct_prompt(image_caption, image_colors, user_input):
129
+ agent_maker_sys = f"""
130
+ You are a AI whose job is to help users create their own custom shoe image which will reflect the characteristics, aesthetics, or include objects from an image described by users.
131
+ In particular, you need to respond succintly and write a prompt for an image generation model. The response must include to the word "mm-tabi" which will trigger the style of shoe. The response should avoid any descriptions of man or woman and don't include any articles of clothing from Caption.
132
+ The response should always end with "still life in the style of retrofuturism, highly detailed, hd, 8k".
133
+ The response should only use one or two aspects from the Caption provided by the user that could easily be applied to a still life scene or characteristic of the shoe, like color or texture or an object.
134
+ For example, if a user says,
135
+ "Colors: gainsboro, sienna
136
+ /n Keywords: summer casual chic
137
+ /n Caption: The image features a man wearing a red and yellow sweater, which has a leopard print design. He is posing for the camera, and the sweater is placed on a mannequin, adding a touch of artistic style to the scene. The photo also includes a book, which is placed on a surface, possibly a table or a shelf. The book's presence adds an element of interest and contrast to the overall aesthetic of the image. The combination of the man in the sweater, the mannequin, and the book creates a visually appealing and artistic composition."
138
+ , provide immediately an image prompt that describes a still life photo of a shoe corresponding to the keywords, color, and objects or stylistic elements from the caption provided.
139
+ Immediately STOP after that. It should be in this format:
140
+ "surreal photo of mm-tabi boot with split toe, surrounded by summer casual chic, red and yellow knit with leopard print, still life in the style of retrofuturism, fantasy, pimped, gainsboro, darkgray, darkolivegreen, sienna, highly detailed, hd, 8k"
141
+
142
+ Here's another example, if a user says,
143
+ "Colors: darkslategray, linen
144
+ /n Keywords: baroque dystopia
145
+ /n Caption: The image features a young man wearing a jacket and a pair of black sunglasses. The jacket has a gray color with a patterned design, and the sunglasses are black and gray as well. The man is posing for the camera, and his facial expression is neutral. The overall aesthetic of the photo is casual and laid-back, with a focus on the sunglasses and the jacket as the main objects in the scene."
146
+ ; then respond:
147
+ "surreal photo of mm-tabi boot with split toe, surrounded by a baroque dystopia, gray patterned design, casual and laid-back, still life in the style of retrofuturism, dreamy, unconventional, darkslategray, linen, silver, highly detailed, hd, 8k"
148
+ """
149
+
150
+ instruction = f"""
151
+ <|system|>
152
+ {agent_maker_sys}</s>
153
+ <|user|>
154
+ """
155
+
156
+ prompt = f"{instruction.strip()}\n Colors: {image_colors} \n Keywords: {user_input} \n Caption: {image_caption}</s>"
157
+ print(f"PROMPT: {prompt}")
158
+ outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
159
+
160
+ pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>'
161
+ cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
162
+
163
+ return cleaned_text.lstrip("\n")
164
 
165
  @spaces.GPU(enable_queue=True)
166
  def text2img_inference(prompt):
167
  gr.Info('Image generation request sent')
168
  with torch.no_grad():
169
  image = text2img_pipe(
170
+ prompt=prompt,
171
  negative_prompt=negative_prompt,
172
  width=1024,
173
  height=1024,
 
175
  guidance_scale=7.5
176
  ).images[0]
177
 
178
+ return image
179
+
180
+ def generate_image(user_input, input_image, image_prompt):
181
+ if image_prompt == "":
182
+ # generate keywords from image
183
+ gr.Info('Starting to generate caption for input image')
184
+ img_caption, img_colors = get_image_keywords(input_image)
185
+
186
+ # filter user input
187
+ gr.Info('Processing keywords for inappropriate language')
188
+ user_input = filter_inappropriate(prompt)
189
+
190
+ # construct prompt from image caption, image colors, and user input
191
+ gr.Info('Consrtucting prompt')
192
+ full_prompt = construct_prompt(img_caption, img_colors, user_input)
193
+ print(f"FULL PROMPT: {full_prompt}")
194
+
195
+ prompt = image_prompt if image_prompt != "" else full_prompt
196
 
197
+ # text2img generation with full prompt construction
198
+ image = text2img_inference(prompt)
199
+ watermarkable_image = prepare_image_for_watermark(image)
200
+ return watermarkable_image, prompt
201
 
202
  gradio_app = gr.Interface(
203
  fn=generate_image,
204
  inputs=[gr.Text(label="User Keywords"), gr.Image(label="Input Image", type='pil'), gr.Text(label="Generated Prompt")],
205
  outputs=[gr.Image(label="Image Generation"), gr.Text(label="Image Prompt")],
206
  title="Custom Tabi",
207
+ description="Enter keywords and upload image to generate a custom Tabi boot"
208
  )
209
 
210
  gradio_app.launch(debug=True)