wgetdd commited on
Commit
46d8cd4
1 Parent(s): c324d8c

Added Style embeddings

Browse files
app.py CHANGED
@@ -1,34 +1,48 @@
1
  import gradio as gr
2
  from torchvision import transforms
3
  import torch
4
- from main_inference import generate_mixed_image, generate_image, progress_video
 
5
  import matplotlib.colors as mcolors
6
 
7
- def run_generate_mixed_image(prompt1,prompt2):
8
- image = generate_mixed_image(prompt1,prompt2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  return image
10
 
11
- def run_generate_image(prompt1,noise_checkbox):
12
- image = generate_image(prompt1,noised_image=noise_checkbox)
13
  return image
14
 
15
- def run_generate_image_with_color_doninance(prompt1,color,color_loss_scale,noised_image_checkbox_1):
16
  # Convert the hexadecimal color code to RGB values
17
  rgba_color = mcolors.hex2color(color)
18
  # Multiply the RGB values by 255 to get them in the [0, 255] range
19
  rgb_values = [int(val * 255) for val in rgba_color]
20
- image = generate_image(prompt1,True,color,color_loss_scale,noised_image_checkbox_1)
21
  return image
22
-
23
- def run_process_video(prompt):
24
- # Ask for text input
25
- video = progress_video(prompt)
26
- return video
27
 
28
 
29
  description_text_to_image = """ ### Text to Image Generation
30
 
31
- 1. Write a Text Prompt.
32
 
33
  2. Output will be an image based on the text prompt provided.
34
 
@@ -38,7 +52,7 @@ description_text_to_image = """ ### Text to Image Generation
38
 
39
  description_generate_mixed_image = """ ### Mix Image Generation
40
 
41
- 1. Write Two Text prompts.
42
 
43
  2. Output will a image which is mix of both of the text provided.
44
 
@@ -48,7 +62,7 @@ description_generate_mixed_image = """ ### Mix Image Generation
48
 
49
  description_generate_image_with_color_dominance = """ ### Generate Images with color dominance
50
 
51
- 1. Write a Text Prompt.
52
 
53
  2. Select a color
54
 
@@ -60,11 +74,13 @@ description_generate_image_with_color_dominance = """ ### Generate Images with c
60
 
61
  """
62
 
63
- description_progress_video = """ ### Get the full generation process video
64
 
65
- 1. Write a Text Prompt.
66
 
67
- 2. Output will be the video which contains frames of generated image, during various inference steps
 
 
68
 
69
  """
70
 
@@ -75,18 +91,26 @@ image_input1 = gr.Image(type='pil')
75
  image_input2 = gr.Image(type='filepath')
76
  image_input3 = gr.Image(type='pil')
77
  image_input4 = gr.Image(type='pil')
 
78
  text_input = gr.Text(label="Enter Text Prompt")
79
  text_input2 = gr.Text(label="Enter Text Prompt")
80
  text_input3 = gr.Text(label="Enter Text Prompt")
81
  text_input4 = gr.Text(label = "Enter Text Prompt")
82
  text_input5 = gr.Text(label = "Enter Text Prompt")
83
- video_output = gr.Video()
 
 
 
 
84
 
85
  color = gr.ColorPicker(label="Select a Color",description="Choose a color from the color picker:")
86
  noised_image_checkbox = gr.inputs.Checkbox(default=False, label="Show Noised Image")
87
  noised_image_checkbox_1 = gr.inputs.Checkbox(default=False, label="Show Noised Image")
88
  noised_image_checkbox_2 = gr.inputs.Checkbox(default=False, label="Show Noised Image")
 
89
  color_loss_scale = gr.inputs.Slider(minimum=0, maximum=255, default=40, step=1,label="Color Loss")
 
 
90
  css = "h1 { text-align: center } .about { text-align: justify; padding-left: 10%; padding-right: 10%; }"
91
 
92
  with gr.Blocks(css=css, title='Play with Stable Diffusion') as demo:
@@ -100,6 +124,7 @@ with gr.Blocks(css=css, title='Play with Stable Diffusion') as demo:
100
  with gr.Row(variant="panel"):
101
  with gr.Column(scale=1):
102
  text_input.render()
 
103
  noised_image_checkbox.render()
104
  with gr.Column(scale=1):
105
  image_input1.render()
@@ -118,7 +143,7 @@ with gr.Blocks(css=css, title='Play with Stable Diffusion') as demo:
118
  # examples_per_page=4)
119
 
120
  run_generate_image_button.click(run_generate_image,
121
- inputs=[text_input,noised_image_checkbox],
122
  outputs=image_input1)
123
 
124
  with gr.Tab("Generate Image with Color Dominance"):
@@ -127,6 +152,7 @@ with gr.Blocks(css=css, title='Play with Stable Diffusion') as demo:
127
  with gr.Column(scale=1):
128
  text_input4.render()
129
  color_loss_scale.render()
 
130
  noised_image_checkbox_1.render()
131
  color.render()
132
  with gr.Column(scale=1):
@@ -146,7 +172,7 @@ with gr.Blocks(css=css, title='Play with Stable Diffusion') as demo:
146
  # examples_per_page=4)
147
 
148
  run_generate_image_with_color_doninance_button.click(run_generate_image_with_color_doninance,
149
- inputs=[text_input4,color,color_loss_scale,noised_image_checkbox_1],
150
  outputs=image_input3)
151
 
152
  ####################################################################################################################
@@ -156,6 +182,7 @@ with gr.Blocks(css=css, title='Play with Stable Diffusion') as demo:
156
  with gr.Column(scale=1):
157
  text_input2.render()
158
  text_input3.render()
 
159
  noised_image_checkbox_2.render()
160
  with gr.Column(scale=1):
161
  image_input4.render()
@@ -175,18 +202,20 @@ with gr.Blocks(css=css, title='Play with Stable Diffusion') as demo:
175
  # examples_per_page=4)
176
 
177
  run_generate_mixed_image_button.click(run_generate_mixed_image,
178
- inputs=[text_input2,text_input3,noised_image_checkbox_2],
179
  outputs=image_input4)
180
 
181
  ####################################################################################################################
182
- with gr.Tab("progress_video"):
183
  # Images
184
  with gr.Row(variant="panel"):
185
  with gr.Column(scale=1):
186
  text_input5.render()
 
 
187
 
188
  with gr.Column(scale=1):
189
- video_output.render()
190
 
191
  # Submit & Clear
192
  with gr.Row():
@@ -194,18 +223,18 @@ with gr.Blocks(css=css, title='Play with Stable Diffusion') as demo:
194
  run_progress_video_button = gr.Button("progress_video", variant='primary')
195
  clear_btn_progress_video = gr.Button("Clear", variant="secondary")
196
 
197
- gr.Markdown(description_progress_video)
198
  # gr.Examples(examples = ["examples/12830823_87d2654e31.jpg", "examples/27782020_4dab210360.jpg", "examples/44129946_9eeb385d77.jpg"],
199
  # inputs=[text_input5],
200
- # outputs=video_output,
201
  # fn=run_process_video,
202
  # examples_per_page=4)
203
 
204
- run_progress_video_button.click(run_process_video,
205
  inputs=[
206
- text_input5,
207
  ],
208
- outputs=video_output)
209
 
210
  #######################################################################################################################
211
  #######################################################################################################################
 
1
  import gradio as gr
2
  from torchvision import transforms
3
  import torch
4
+ from main_inference import generate_mixed_image, generate_image
5
+ from style_guidence import generate_with_prompt_style
6
  import matplotlib.colors as mcolors
7
 
8
+
9
+ style_file_maps = {
10
+ '3D Female Cyborgs':"style_embeddings/3d_female_cyborgs.bin",
11
+ '80s Anime':"style_embeddings/80s_anime.bin",
12
+ 'Anders Zorn':"style_embeddings/anders_zorn.bin",
13
+ "Angus Mcbride":"style_embeddings/angus_mcbride.bin",
14
+ "Breack Core":"style_embeddings/breakcore.bin",
15
+ "Brittney Williams":"style_embeddings/brittney_williams.bin",
16
+ "Bull vs Bear":"style_embeddings/bull_vs_bear.bin",
17
+ "Caitlin FairChild":"style_embeddings/caitlin_fairchild.bin",
18
+ "Exodus Styling":"style_embeddings/exodus_styling.bin",
19
+ "FoorByv2":"style_embeddings/foorbyv2.bin"
20
+ }
21
+
22
+ def run_generate_mixed_image(prompt1,prompt2,num_of_inf_steps,noised_image):
23
+ image = generate_mixed_image(prompt1,prompt2,num_of_inf_steps,noised_image)
24
  return image
25
 
26
+ def run_generate_image(prompt1,num_of_inf_steps,noise_checkbox):
27
+ image = generate_image(prompt1,num_inference_steps=num_of_inf_steps,noised_image=noise_checkbox)
28
  return image
29
 
30
+ def run_generate_image_with_color_doninance(prompt1,color,color_loss_scale,num_of_inf_steps,noised_image_checkbox_1):
31
  # Convert the hexadecimal color code to RGB values
32
  rgba_color = mcolors.hex2color(color)
33
  # Multiply the RGB values by 255 to get them in the [0, 255] range
34
  rgb_values = [int(val * 255) for val in rgba_color]
35
+ image = generate_image(prompt1,num_of_inf_steps,True,rgb_values,color_loss_scale,noised_image_checkbox_1)
36
  return image
37
+
38
+ def run_generate_image_with_style(prompt,style,num_of_inf_steps):
39
+ output = generate_with_prompt_style(prompt, style_file_maps[style],num_of_inf_steps)
40
+ return output
 
41
 
42
 
43
  description_text_to_image = """ ### Text to Image Generation
44
 
45
+ 1. Write a Text Prompt and number of inference steps, the more the better results but execution time will be high.
46
 
47
  2. Output will be an image based on the text prompt provided.
48
 
 
52
 
53
  description_generate_mixed_image = """ ### Mix Image Generation
54
 
55
+ 1. Write Two Text prompts and number of inference steps, the more the better results but execution time will be high.
56
 
57
  2. Output will a image which is mix of both of the text provided.
58
 
 
62
 
63
  description_generate_image_with_color_dominance = """ ### Generate Images with color dominance
64
 
65
+ 1. Write a Text Prompt and number of inference steps, the more the better results but execution time will be high.
66
 
67
  2. Select a color
68
 
 
74
 
75
  """
76
 
77
+ description_generate_prompt_with_style = """ ### Get a generated image in the selection of your style
78
 
79
+ 1. Write a Text Prompt and number of inference steps, the more the better results but execution time will be high.
80
 
81
+ 2. Select a style to dominate the photo
82
+
83
+ 3. Get the Output
84
 
85
  """
86
 
 
91
  image_input2 = gr.Image(type='filepath')
92
  image_input3 = gr.Image(type='pil')
93
  image_input4 = gr.Image(type='pil')
94
+ image_input5 = gr.Image(type='pil')
95
  text_input = gr.Text(label="Enter Text Prompt")
96
  text_input2 = gr.Text(label="Enter Text Prompt")
97
  text_input3 = gr.Text(label="Enter Text Prompt")
98
  text_input4 = gr.Text(label = "Enter Text Prompt")
99
  text_input5 = gr.Text(label = "Enter Text Prompt")
100
+
101
+ num_of_inf_steps_slider1 = gr.inputs.Slider(minimum=0, maximum=50, default=30, step=1,label="Num of Inference Steps")
102
+ num_of_inf_steps_slider2 = gr.inputs.Slider(minimum=0, maximum=50, default=30, step=1,label="Num of Inference Steps")
103
+ num_of_inf_steps_slider3 = gr.inputs.Slider(minimum=0, maximum=50, default=30, step=1,label="Num of Inference Steps")
104
+ num_of_inf_steps_slider4 = gr.inputs.Slider(minimum=0, maximum=50, default=30, step=1,label="Num of Inference Steps")
105
 
106
  color = gr.ColorPicker(label="Select a Color",description="Choose a color from the color picker:")
107
  noised_image_checkbox = gr.inputs.Checkbox(default=False, label="Show Noised Image")
108
  noised_image_checkbox_1 = gr.inputs.Checkbox(default=False, label="Show Noised Image")
109
  noised_image_checkbox_2 = gr.inputs.Checkbox(default=False, label="Show Noised Image")
110
+ noised_image_checkbox_3 = gr.inputs.Checkbox(default=False, label="Show Noised Image")
111
  color_loss_scale = gr.inputs.Slider(minimum=0, maximum=255, default=40, step=1,label="Color Loss")
112
+ style_options = ['3D Female Cyborgs', '80s Anime','Anders Zorn',"Angus Mcbride","Breack Core", "Brittney Williams","Bull vs Bear","Caitlin FairChild","Exodus Styling","FoorByv2"]
113
+ selected_style = gr.Dropdown(style_options,label="Select a Style to Follow",default="Anders Zorn")
114
  css = "h1 { text-align: center } .about { text-align: justify; padding-left: 10%; padding-right: 10%; }"
115
 
116
  with gr.Blocks(css=css, title='Play with Stable Diffusion') as demo:
 
124
  with gr.Row(variant="panel"):
125
  with gr.Column(scale=1):
126
  text_input.render()
127
+ num_of_inf_steps_slider1.render()
128
  noised_image_checkbox.render()
129
  with gr.Column(scale=1):
130
  image_input1.render()
 
143
  # examples_per_page=4)
144
 
145
  run_generate_image_button.click(run_generate_image,
146
+ inputs=[text_input,num_of_inf_steps_slider1,noised_image_checkbox],
147
  outputs=image_input1)
148
 
149
  with gr.Tab("Generate Image with Color Dominance"):
 
152
  with gr.Column(scale=1):
153
  text_input4.render()
154
  color_loss_scale.render()
155
+ num_of_inf_steps_slider2.render()
156
  noised_image_checkbox_1.render()
157
  color.render()
158
  with gr.Column(scale=1):
 
172
  # examples_per_page=4)
173
 
174
  run_generate_image_with_color_doninance_button.click(run_generate_image_with_color_doninance,
175
+ inputs=[text_input4,color,color_loss_scale,num_of_inf_steps_slider2,noised_image_checkbox_1],
176
  outputs=image_input3)
177
 
178
  ####################################################################################################################
 
182
  with gr.Column(scale=1):
183
  text_input2.render()
184
  text_input3.render()
185
+ num_of_inf_steps_slider3.render()
186
  noised_image_checkbox_2.render()
187
  with gr.Column(scale=1):
188
  image_input4.render()
 
202
  # examples_per_page=4)
203
 
204
  run_generate_mixed_image_button.click(run_generate_mixed_image,
205
+ inputs=[text_input2,text_input3,num_of_inf_steps_slider3,noised_image_checkbox_2],
206
  outputs=image_input4)
207
 
208
  ####################################################################################################################
209
+ with gr.Tab("Generate Image with Style"):
210
  # Images
211
  with gr.Row(variant="panel"):
212
  with gr.Column(scale=1):
213
  text_input5.render()
214
+ num_of_inf_steps_slider4.render()
215
+ selected_style.render()
216
 
217
  with gr.Column(scale=1):
218
+ image_input5.render()
219
 
220
  # Submit & Clear
221
  with gr.Row():
 
223
  run_progress_video_button = gr.Button("progress_video", variant='primary')
224
  clear_btn_progress_video = gr.Button("Clear", variant="secondary")
225
 
226
+ gr.Markdown(description_generate_prompt_with_style)
227
  # gr.Examples(examples = ["examples/12830823_87d2654e31.jpg", "examples/27782020_4dab210360.jpg", "examples/44129946_9eeb385d77.jpg"],
228
  # inputs=[text_input5],
229
+ # outputs=image_input5,
230
  # fn=run_process_video,
231
  # examples_per_page=4)
232
 
233
+ run_progress_video_button.click(run_generate_image_with_style,
234
  inputs=[
235
+ text_input5,selected_style,num_of_inf_steps_slider4
236
  ],
237
+ outputs=image_input5)
238
 
239
  #######################################################################################################################
240
  #######################################################################################################################
main_inference.py CHANGED
@@ -10,8 +10,6 @@ from torchvision import transforms as tfms
10
  import shutil
11
  # For video display:
12
  import cv2
13
- from IPython.display import HTML
14
- from base64 import b64encode
15
  import os
16
  from utils import color_loss,latents_to_pil,pil_to_latent,sketch_loss
17
  # Set device
@@ -34,11 +32,11 @@ unet = unet.to(torch_device)
34
 
35
  scheduler.set_timesteps(15)
36
 
37
- def generate_mixed_image(prompt1, prompt2,noised_image=False):
38
  mix_factor = 0.4 #@param
39
  height = 512 # default height of Stable Diffusion
40
  width = 512 # default width of Stable Diffusion
41
- num_inference_steps = 50 #@param # Number of denoising steps
42
  guidance_scale = 8 # Scale for classifier-free guidance
43
  generator = torch.manual_seed(32) # Seed generator to create the inital latent noise
44
  batch_size = 1
@@ -100,12 +98,12 @@ def generate_mixed_image(prompt1, prompt2,noised_image=False):
100
 
101
  return output
102
 
103
- def generate_image(prompt,color_postprocessing=False,postporcessing_color=None,color_loss_scale=40,noised_image=False):
104
  #@title Store the predicted outputs and next frame for later viewing
105
  #prompt = 'A campfire (oil on canvas)' #
106
  height = 512 # default height of Stable Diffusion
107
  width = 512 # default width of Stable Diffusion
108
- num_inference_steps = 50 # # Number of denoising steps
109
  guidance_scale = 8 # # Scale for classifier-free guidance
110
  generator = torch.manual_seed(32) # Seed generator to create the inital latent noise
111
  batch_size = 1
@@ -210,26 +208,6 @@ def generate_image(prompt,color_postprocessing=False,postporcessing_color=None,c
210
 
211
  return output
212
 
213
- def progress_video(prompt):
214
- pil_image = generate_image(prompt)
215
- # Generate a list of image file paths (replace with your own logic)
216
- num_frames = len(os.listdir("steps/"))
217
- image_files = [f"steps/{i:04d}.jpeg" for i in range(1, num_frames + 1)]
218
- # Read the first image to get its size (assuming all images have the same size)
219
- first_image = cv2.imread({image_files[0]})
220
- height, width, _ = first_image.shape
221
-
222
- # Define the output video writer
223
- fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for MP4
224
- out = cv2.VideoWriter('out.mp4', fourcc, 12, (width, height))
225
-
226
- for image_file in image_files:
227
- frame = cv2.imread(image_file)
228
- out.write(frame)
229
-
230
- out.release()
231
- return "out.mp4"
232
-
233
  def generate_noised_version_of_image(pil_image):
234
  # View a noised version
235
  encoded = pil_to_latent(pil_image,vae)
@@ -239,6 +217,7 @@ def generate_noised_version_of_image(pil_image):
239
  return latents_to_pil(encoded_and_noised,vae)[0] # Display
240
 
241
 
 
242
  # if __name__ == "__main__":
243
  # prompt = 'A campfire (oil on canvas)'
244
  # color_loss_scale = 40
@@ -248,5 +227,3 @@ def generate_noised_version_of_image(pil_image):
248
  # #pil_image = generate_noised_version_of_image(Image.open('output.png').resize((512, 512)))
249
  # pil_image.save("output1.png")
250
 
251
- if __name__ == "__main__":
252
- progress_video("lol")
 
10
  import shutil
11
  # For video display:
12
  import cv2
 
 
13
  import os
14
  from utils import color_loss,latents_to_pil,pil_to_latent,sketch_loss
15
  # Set device
 
32
 
33
  scheduler.set_timesteps(15)
34
 
35
+ def generate_mixed_image(prompt1, prompt2,num_inference_steps=50,noised_image=False):
36
  mix_factor = 0.4 #@param
37
  height = 512 # default height of Stable Diffusion
38
  width = 512 # default width of Stable Diffusion
39
+ num_inference_steps = num_inference_steps #@param # Number of denoising steps
40
  guidance_scale = 8 # Scale for classifier-free guidance
41
  generator = torch.manual_seed(32) # Seed generator to create the inital latent noise
42
  batch_size = 1
 
98
 
99
  return output
100
 
101
+ def generate_image(prompt,num_inference_steps=50,color_postprocessing=False,postporcessing_color=None,color_loss_scale=40,noised_image=False):
102
  #@title Store the predicted outputs and next frame for later viewing
103
  #prompt = 'A campfire (oil on canvas)' #
104
  height = 512 # default height of Stable Diffusion
105
  width = 512 # default width of Stable Diffusion
106
+ num_inference_steps = num_inference_steps # # Number of denoising steps
107
  guidance_scale = 8 # # Scale for classifier-free guidance
108
  generator = torch.manual_seed(32) # Seed generator to create the inital latent noise
109
  batch_size = 1
 
208
 
209
  return output
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  def generate_noised_version_of_image(pil_image):
212
  # View a noised version
213
  encoded = pil_to_latent(pil_image,vae)
 
217
  return latents_to_pil(encoded_and_noised,vae)[0] # Display
218
 
219
 
220
+
221
  # if __name__ == "__main__":
222
  # prompt = 'A campfire (oil on canvas)'
223
  # color_loss_scale = 40
 
227
  # #pil_image = generate_noised_version_of_image(Image.open('output.png').resize((512, 512)))
228
  # pil_image.save("output1.png")
229
 
 
 
requirements.txt CHANGED
@@ -1,8 +1,12 @@
1
- transformers
2
- diffusers==0.2.4
3
- sentence_transformers
4
- gradio
5
- torch
6
- torchvision
7
- matplotlib
8
- opencv-python
 
 
 
 
 
1
+ diffusers==0.21.4
2
+ gradio==3.49.0
3
+ ipython==8.15.0
4
+ matplotlib==3.8.0
5
+ numpy==1.26.1
6
+ opencv_python==4.8.1.78
7
+ Pillow==10.1.0
8
+ Pillow==10.1.0
9
+ torch==2.1.0
10
+ torchvision==0.16.0
11
+ tqdm==4.66.1
12
+ transformers==4.25.1
style_embeddings/3d_female_cyborgs.bin ADDED
Binary file (3.82 kB). View file
 
style_embeddings/80s_anime.bin ADDED
Binary file (3.82 kB). View file
 
style_embeddings/anders_zorn.bin ADDED
Binary file (3.82 kB). View file
 
style_embeddings/angus_mcbride.bin ADDED
Binary file (3.82 kB). View file
 
style_embeddings/breakcore.bin ADDED
Binary file (3.82 kB). View file
 
style_embeddings/brittney_williams.bin ADDED
Binary file (3.82 kB). View file
 
style_embeddings/buhu_art_style.bin ADDED
Binary file (4.86 kB). View file
 
style_embeddings/bull_vs_bear.bin ADDED
Binary file (3.82 kB). View file
 
style_embeddings/caitlin_fairchild.bin ADDED
Binary file (3.82 kB). View file
 
style_embeddings/exodus_styling.bin ADDED
Binary file (3.82 kB). View file
 
style_embeddings/foorbyv2.bin ADDED
Binary file (3.82 kB). View file
 
style_embeddings/learned_embeds_arcane.bin ADDED
@@ -0,0 +1 @@
 
 
1
+ {"payload":{"allShortcutsEnabled":false,"fileTree":{"S20":{"items":[{"name":"README.md","path":"S20/README.md","contentType":"file"},{"name":"S20-HuggingFace.ipynb","path":"S20/S20-HuggingFace.ipynb","contentType":"file"},{"name":"S20.ipynb","path":"S20/S20.ipynb","contentType":"file"},{"name":"learned_embeds_arcane.bin","path":"S20/learned_embeds_arcane.bin","contentType":"file"},{"name":"learned_embeds_dr_strange.bin","path":"S20/learned_embeds_dr_strange.bin","contentType":"file"},{"name":"learned_embeds_gta5.bin","path":"S20/learned_embeds_gta5.bin","contentType":"file"},{"name":"learned_embeds_illustration.bin","path":"S20/learned_embeds_illustration.bin","contentType":"file"},{"name":"learned_embeds_pokemon.bin","path":"S20/learned_embeds_pokemon.bin","contentType":"file"}],"totalCount":8},"":{"items":[{"name":"S10","path":"S10","contentType":"directory"},{"name":"S11","path":"S11","contentType":"directory"},{"name":"S12","path":"S12","contentType":"directory"},{"name":"S13","path":"S13","contentType":"directory"},{"name":"S15","path":"S15","contentType":"directory"},{"name":"S16","path":"S16","contentType":"directory"},{"name":"S18","path":"S18","contentType":"directory"},{"name":"S19","path":"S19","contentType":"directory"},{"name":"S20","path":"S20","contentType":"directory"},{"name":"S5","path":"S5","contentType":"directory"},{"name":"S6","path":"S6","contentType":"directory"},{"name":"S7","path":"S7","contentType":"directory"},{"name":"S8","path":"S8","contentType":"directory"},{"name":"S9","path":"S9","contentType":"directory"},{"name":"README.md","path":"README.md","contentType":"file"}],"totalCount":15}},"fileTreeProcessingTime":5.458002,"foldersToFetch":[],"reducedMotionEnabled":null,"repo":{"id":648622518,"defaultBranch":"main","name":"ERA-V1-Assignments","ownerLogin":"gupta1912","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-06-02T12:02:53.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/47393639?v=4","public":true,"private":false,"isOrgOwned":false},"symbolsExpanded":false,"treeExpanded":true,"refInfo":{"name":"main","listCacheKey":"v0:1685707373.878437","canEdit":false,"refType":"branch","currentOid":"69279d066a624710a5d9143b24292e736177714d"},"path":"S20/learned_embeds_arcane.bin","currentUser":null,"blob":{"rawLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":false,"configFilePath":null,"networkDependabotPath":"/gupta1912/ERA-V1-Assignments/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":null,"repoAlertsPath":"/gupta1912/ERA-V1-Assignments/security/dependabot","repoSecurityAndAnalysisPath":"/gupta1912/ERA-V1-Assignments/settings/security_analysis","repoOwnerIsOrg":false,"currentUserCanAdminRepo":false},"displayName":"learned_embeds_arcane.bin","displayUrl":"https://github.com/gupta1912/ERA-V1-Assignments/blob/main/S20/learned_embeds_arcane.bin?raw=true","headerInfo":{"blobSize":"3.73 KB","deleteInfo":{"deleteTooltip":"You must be signed in to make or propose changes"},"editInfo":{"editTooltip":"You must be signed in to make or propose changes"},"ghDesktopPath":"https://desktop.github.com","gitLfsPath":null,"onBranch":true,"shortPath":"3672956","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2Fgupta1912%2FERA-V1-Assignments%2Fblob%2Fmain%2FS20%2Flearned_embeds_arcane.bin","isCSV":false,"isRichtext":false,"toc":null,"lineInfo":{"truncatedLoc":null,"truncatedSloc":null},"mode":"file"},"image":false,"isCodeownersFile":null,"isPlain":false,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":null,"languageID":null,"large":false,"loggedIn":false,"newDiscussionPath":"/gupta1912/ERA-V1-Assignments/discussions/new","newIssuePath":"/gupta1912/ERA-V1-Assignments/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/gupta1912/ERA-V1-Assignments/blob/main/S20/learned_embeds_arcane.bin","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/gupta1912/ERA-V1-Assignments/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"rawBlobUrl":"https://github.com/gupta1912/ERA-V1-Assignments/raw/main/S20/learned_embeds_arcane.bin","renderImageOrRaw":true,"richText":null,"renderedFileInfo":null,"shortPath":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"gupta1912","repoName":"ERA-V1-Assignments","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":false,"actionsOnboardingTip":null},"truncated":false,"viewable":false,"workflowRedirectUrl":null,"symbols":null},"copilotInfo":null,"csrf_tokens":{"/gupta1912/ERA-V1-Assignments/branches":{"post":"BDalnmGaFsCat0pyEB2XRtEEgU1tU3hPkqod54YEtINRMIyVpZhY2QaEfjaGTzB49WIGe8mvKtoNNi3HfJg_fA"},"/repos/preferences":{"post":"9b2BsWn9aEPKIJwMKJX1lraH9ktRNImj8HovOQHsPkUk6M41p49XqC_qiNv0dz03T8TyuJBk3-bJqf53V9cV9g"}}},"title":"ERA-V1-Assignments/S20/learned_embeds_arcane.bin at main · gupta1912/ERA-V1-Assignments"}
style_embeddings/loaded_style_empire.bin ADDED
Binary file (10.6 kB). View file
 
style_embeddings/useembeddingname-neg.bin ADDED
Binary file (3.95 kB). View file
 
style_embeddings/useembeddingname.bin ADDED
Binary file (3.95 kB). View file
 
style_guidence.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import CLIPTextModel, CLIPTokenizer
3
+ from diffusers import AutoencoderKL, UNet2DConditionModel, LMSDiscreteScheduler
4
+ from tqdm.auto import tqdm
5
+ from torch import autocast
6
+ from PIL import Image
7
+ from matplotlib import pyplot as plt
8
+ import numpy
9
+ from torchvision import transforms as tfms
10
+ import shutil
11
+ # For video display:
12
+ import cv2
13
+ from IPython.display import HTML
14
+ from base64 import b64encode
15
+ import os
16
+ from utils import color_loss,pil_to_latent,sketch_loss
17
+ # Set device
18
+ torch_device = "cpu"
19
+
20
+ vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae")
21
+
22
+ # Load the tokenizer and text encoder to tokenize and encode the text.
23
+ tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
24
+ text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
25
+
26
+ # The UNet model for generating the latents.
27
+ unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet")
28
+
29
+ # The noise scheduler
30
+ scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000)
31
+ vae = vae.to(torch_device)
32
+ text_encoder = text_encoder.to(torch_device)
33
+ unet = unet.to(torch_device)
34
+
35
+ scheduler.set_timesteps(15)
36
+
37
+ token_emb_layer = text_encoder.text_model.embeddings.token_embedding
38
+ pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
39
+
40
+ position_ids = text_encoder.text_model.embeddings.position_ids[:, :77]
41
+ position_embeddings = pos_emb_layer(position_ids)
42
+
43
+
44
+
45
+ def get_output_embeds(input_embeddings):
46
+ # CLIP's text model uses causal mask, so we prepare it here:
47
+ bsz, seq_len = input_embeddings.shape[:2]
48
+ causal_attention_mask = text_encoder.text_model._build_causal_attention_mask(bsz, seq_len, dtype=input_embeddings.dtype)
49
+
50
+ # Getting the output embeddings involves calling the model with passing output_hidden_states=True
51
+ # so that it doesn't just return the pooled final predictions:
52
+ encoder_outputs = text_encoder.text_model.encoder(
53
+ inputs_embeds=input_embeddings,
54
+ attention_mask=None, # We aren't using an attention mask so that can be None
55
+ causal_attention_mask=causal_attention_mask.to(torch_device),
56
+ output_attentions=None,
57
+ output_hidden_states=True, # We want the output embs not the final output
58
+ return_dict=None,
59
+ )
60
+
61
+ # We're interested in the output hidden state only
62
+ output = encoder_outputs[0]
63
+
64
+ # There is a final layer norm we need to pass these through
65
+ output = text_encoder.text_model.final_layer_norm(output)
66
+
67
+ # And now they're ready!
68
+ return output
69
+
70
+ def set_timesteps(scheduler, num_inference_steps):
71
+ scheduler.set_timesteps(num_inference_steps)
72
+ scheduler.timesteps = scheduler.timesteps.to(torch.float32)
73
+
74
+ # def latents_to_pil(latents):
75
+ # # bath of latents -> list of images
76
+ # latents = (1 / 0.18215) * latents
77
+ # with torch.no_grad():
78
+ # image = vae.decode(latents).sample
79
+ # image = (image / 2 + 0.5).clamp(0, 1)
80
+ # image = image.detach().cpu().permute(0, 2, 3, 1).numpy()
81
+ # images = (image * 255).round().astype("uint8")
82
+ # pil_images = [Image.fromarray(image) for image in images]
83
+ # return pil_images
84
+
85
+ def latents_to_pil(latents):
86
+ # bath of latents -> list of images
87
+ latents = (1 / 0.18215) * latents
88
+ with torch.no_grad():
89
+ image = vae.decode(latents)
90
+ image = (image / 2 + 0.5).clamp(0, 1)
91
+ image = image.detach().cpu().permute(0, 2, 3, 1).numpy()
92
+ images = (image * 255).round().astype("uint8")
93
+ pil_images = [Image.fromarray(image) for image in images]
94
+ return pil_images
95
+
96
+
97
+ def generate_with_embs(text_embeddings,text_input, seed,num_inference_steps):
98
+
99
+ height = 512 # default height of Stable Diffusion
100
+ width = 512 # default width of Stable Diffusion
101
+ num_inference_steps = num_inference_steps # Number of denoising steps
102
+ guidance_scale = 7.5 # Scale for classifier-free guidance
103
+ generator = torch.manual_seed(seed) # Seed generator to create the inital latent noise
104
+ batch_size = 1
105
+
106
+ max_length = text_input.input_ids.shape[-1]
107
+ uncond_input = tokenizer(
108
+ [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
109
+ )
110
+ with torch.no_grad():
111
+ uncond_embeddings = text_encoder(uncond_input.input_ids.to(torch_device))[0]
112
+ text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
113
+
114
+ # Prep Scheduler
115
+ set_timesteps(scheduler, num_inference_steps)
116
+
117
+ # Prep latents
118
+ latents = torch.randn(
119
+ (batch_size, unet.in_channels, height // 8, width // 8),
120
+ generator=generator,
121
+ )
122
+ latents = latents.to(torch_device)
123
+ # latents = latents * scheduler.init_noise_sigma
124
+ latents = latents * scheduler.sigmas[0] # Need to scale to match k
125
+
126
+ # Loop
127
+ for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps)):
128
+ # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
129
+ latent_model_input = torch.cat([latents] * 2)
130
+ sigma = scheduler.sigmas[i]
131
+ #latent_model_input = scheduler.scale_model_input(latent_model_input, t)
132
+ latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)
133
+ # predict the noise residual
134
+ with torch.no_grad():
135
+ noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
136
+
137
+ # perform guidance
138
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
139
+ noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
140
+
141
+ # compute the previous noisy sample x_t -> x_t-1
142
+ #latents = scheduler.step(noise_pred, t, latents).prev_sample
143
+ latents = scheduler.step(noise_pred, i, latents)["prev_sample"]
144
+ return latents_to_pil(latents)[0]
145
+
146
+ def generate_with_prompt_style(prompt, style, num_of_inf_steps=50,seed = 42):
147
+
148
+ prompt = prompt + ' in style of s'
149
+ embed = torch.load(style)
150
+ print("Keys",embed.keys())
151
+ text_input = tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
152
+ # for t in text_input['input_ids'][0][:20]: # We'll just look at the first 7 to save you from a wall of '<|endoftext|>'
153
+ # print(t, tokenizer.decoder.get(int(t)))
154
+ input_ids = text_input.input_ids.to(torch_device)
155
+
156
+ token_embeddings = token_emb_layer(input_ids)
157
+ # The new embedding - our special birb word
158
+ replacement_token_embedding = embed[list(embed.keys())[0]].to(torch_device)
159
+
160
+ # Insert this into the token embeddings
161
+ token_embeddings[0, torch.where(input_ids[0]==338)] = replacement_token_embedding.to(torch_device)
162
+
163
+ # Combine with pos embs
164
+ input_embeddings = token_embeddings + position_embeddings
165
+
166
+ # Feed through to get final output embs
167
+ modified_output_embeddings = get_output_embeds(input_embeddings)
168
+
169
+ # And generate an image with this:
170
+ return generate_with_embs(modified_output_embeddings, text_input, seed,num_of_inf_steps)
171
+
172
+
173
+ # prompt = 'A man sipping wine wearing a spacesuit on the moon'
174
+ # image = generate_with_prompt_style(prompt, '/home/deepanshudashora/Documents/Stable_Diffusion/caitlin_fairchild.bin')
175
+
176
+ # image.save("output.png")