nigeljw commited on
Commit
00972fe
1 Parent(s): b72451c

Added latent walk to lerp between multiple rand input samples

Browse files
Files changed (1) hide show
  1. app.py +54 -25
app.py CHANGED
@@ -6,44 +6,57 @@ from torchvision import transforms
6
  from diffusers import StableDiffusionInpaintPipeline
7
  from diffusers import DPMSolverMultistepScheduler
8
 
 
 
9
  deviceStr = "cuda" if torch.cuda.is_available() else "cpu"
10
  device = torch.device(deviceStr)
11
  latents = None
12
-
13
- def GenerateNewLatentsForInference():
14
- global latents
15
- if deviceStr == "cuda":
16
- latents = torch.randn((1, 4, 64, 64), device=device, dtype=torch.float16)
17
- else:
18
- latents = torch.randn((1, 4, 64, 64), device=device)
19
-
20
- if deviceStr == "cuda":
21
- pipeline = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting",
22
- revision="fp16",
23
- torch_dtype=torch.float16)
24
- #safety_checker=lambda images, **kwargs: (images, False))
25
- pipeline.to(device)
26
- pipeline.enable_xformers_memory_efficient_attention()
27
- else:
28
- pipeline = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting")
29
- #safety_checker=lambda images, **kwargs: (images, False))
30
-
31
  imageSize = (512, 512)
32
  lastImage = Image.new(mode="RGB", size=imageSize)
33
-
34
  lastSeed = 4096
35
  generator = torch.Generator(device).manual_seed(lastSeed)
 
 
 
 
 
 
36
 
37
- GenerateNewLatentsForInference()
 
 
 
 
 
 
38
 
39
- def diffuse(staticLatents, generatorSeed, inputImage, mask, pauseInference, prompt, negativePrompt, guidanceScale, numInferenceSteps):
40
- global latents, lastSeed, generator, deviceStr, lastImage
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  if mask is None or pauseInference is True:
43
  return lastImage
44
 
45
  if staticLatents is False:
46
  GenerateNewLatentsForInference()
 
 
 
 
47
 
48
  if lastSeed != generatorSeed:
49
  generator = torch.Generator(device).manual_seed(generatorSeed)
@@ -55,13 +68,23 @@ def diffuse(staticLatents, generatorSeed, inputImage, mask, pauseInference, prom
55
  mask_image=mask,
56
  guidance_scale=guidanceScale,
57
  num_inference_steps=numInferenceSteps,
58
- latents=latents,
59
  generator=generator).images[0]
60
 
61
  lastImage = newImage
62
 
63
  return newImage
64
 
 
 
 
 
 
 
 
 
 
 
65
  defaultMask = Image.open("assets/masks/diamond.png")
66
  numInfStepsDesc = "A higher value generally increases quality, but reduces the frames per second of the output stream."
67
  staticLatentsDesc = "This setting increases the frame to frame determisn of the generation. If this is disabled, then the inerence will take continuous large walks across the latent space between frames."
@@ -69,6 +92,7 @@ generatorSeedDesc = "Identical seeds allow for persistent scene generation betwe
69
  promptDesc = "This text will condition the generation of the scene to help guide the content creation."
70
  negPromptDesc = "This text will help deter the generation from converging towards reconstructing the elements described in the text."
71
  outputText = "This inferred imagery expands the field of view from the masked area of the input camera feed."
 
72
 
73
  prompt = gradio.Textbox(label="Prompt", info=promptDesc, placeholder="A person in a room with colored hair", lines=3)
74
  negativePrompt = gradio.Textbox(label="Negative Prompt", info=negPromptDesc, placeholder="Facial hair", lines=3)
@@ -79,11 +103,16 @@ guidanceScale = gradio.Slider(label="Guidance Scale", info="A higher value cause
79
  numInferenceSteps = gradio.Slider(label="Number of Inference Steps", info=numInfStepsDesc, maximum=100, minimum=1, value=20)
80
  generatorSeed = gradio.Slider(label="Generator Seed", info=generatorSeedDesc, maximum=10000, value=lastSeed)
81
  staticLatents = gradio.Checkbox(label="Static Latents", info=staticLatentsDesc, value=True)
 
82
  pauseInference = gradio.Checkbox(label="Pause Inference", value=False)
 
83
 
84
  description = "This generative machine learning demonstration streams stable diffusion outpainting inference live from your camera on your computer or phone to expand your local reality and create an alternate world. High quality frame to frame determinism is a hard problem to solve for latent diffusion models as the generation is inherently relative to input noise distributions for the latents, and many factors such as the inherent Bayer noise from the camera images as well as anything that is altered between camera images (such as focus, white balance, etc) causes non-determinism between frames. Some methods apply spationtemporal attention, but this demonstration focuses on the control over the input latents to navigate the latent space. <b>Increase the lighting of your physical scene from your camera's perspective, and avoid self shadows of scene content, to improve the quality and consistency of the scene generation.</b>"
85
  article = "This demonstration should initialize automatically from the default values, and run relatively well, but if the output is not an ideal reconstruction of your physical local space from your camera's perspective, then you should adjust the generator seed to take large walks across the latent space. In addition, the static latents can be disable to continously walk the latent space, and then it can be set to static again when a better region of the embedded space is found, but this will increase frame to fram non-determinism. You can also condition the generation using prompts to re-enforce or change aspects of the scene. <b>If you see a black image instead of a generated output image, then you are running into the safety checker. </b>This can trigger inconsistently even when the generated content is purely PG. If this happens, then increase the lighting of the scene and also increase the number of inference steps to improve the generated predicition to reduce the likelihood of the saftey checker triggering a false positive."
86
 
87
- inputs=[staticLatents, generatorSeed, inputImage, mask, pauseInference, prompt, negativePrompt, guidanceScale, numInferenceSteps]
88
  ux = gradio.Interface(fn=diffuse, title="View Diffusion", article=article, description=description, inputs=inputs, outputs=outputImage, live=True)
 
 
 
89
  ux.launch()
 
6
  from diffusers import StableDiffusionInpaintPipeline
7
  from diffusers import DPMSolverMultistepScheduler
8
 
9
+ print("Initializing View Diffusion")
10
+
11
  deviceStr = "cuda" if torch.cuda.is_available() else "cpu"
12
  device = torch.device(deviceStr)
13
  latents = None
14
+ latentsOld = None
15
+ latentsSize = (1, 4, 64, 64)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  imageSize = (512, 512)
17
  lastImage = Image.new(mode="RGB", size=imageSize)
 
18
  lastSeed = 4096
19
  generator = torch.Generator(device).manual_seed(lastSeed)
20
+ modelNames = ["stabilityai/stable-diffusion-2-inpainting",
21
+ "runwayml/stable-diffusion-inpainting"]
22
+ modelIndex = 0
23
+ pipeline = None
24
+ oldLatentWalk = None
25
+ activeLatents = None
26
 
27
+ def GenerateNewLatentsForInference():
28
+ global latents, oldLatents
29
+ oldLatents = latents
30
+ if deviceStr == "cuda":
31
+ latents = torch.randn(latentsSize, device=device, dtype=torch.float16)
32
+ else:
33
+ latents = torch.randn(latentsSize, device=device)
34
 
35
+ def InitializeOutpainting():
36
+ print("Initializing Outpainting")
37
+ global pipeline
38
+ if deviceStr == "cuda":
39
+ pipeline = StableDiffusionInpaintPipeline.from_pretrained(modelNames[modelIndex],
40
+ torch_dtype=torch.float16)
41
+ #safety_checker=lambda images, **kwargs: (images, False))
42
+ pipeline.to(device)
43
+ pipeline.enable_xformers_memory_efficient_attention()
44
+ else:
45
+ pipeline = StableDiffusionInpaintPipeline.from_pretrained(modelNames[modelIndex])
46
+ #safety_checker=lambda images, **kwargs: (images, False))
47
+
48
+ def diffuse(latentWalk, staticLatents, generatorSeed, inputImage, mask, pauseInference, prompt, negativePrompt, guidanceScale, numInferenceSteps):
49
+ global lastImage, lastSeed, generator, oldLatentWalk, activeLatents
50
 
51
  if mask is None or pauseInference is True:
52
  return lastImage
53
 
54
  if staticLatents is False:
55
  GenerateNewLatentsForInference()
56
+
57
+ if oldLatentWalk != latentWalk:
58
+ activeLatents = torch.lerp(oldLatents, latents, latentWalk)
59
+ oldLatentWalk = latentWalk
60
 
61
  if lastSeed != generatorSeed:
62
  generator = torch.Generator(device).manual_seed(generatorSeed)
 
68
  mask_image=mask,
69
  guidance_scale=guidanceScale,
70
  num_inference_steps=numInferenceSteps,
71
+ latents=activeLatents,
72
  generator=generator).images[0]
73
 
74
  lastImage = newImage
75
 
76
  return newImage
77
 
78
+ InitializeOutpainting()
79
+
80
+ print("Generating Latents")
81
+
82
+ GenerateNewLatentsForInference()
83
+ GenerateNewLatentsForInference()
84
+ activeLatents = oldLatents
85
+
86
+ print("Initializing Gradio Interface")
87
+
88
  defaultMask = Image.open("assets/masks/diamond.png")
89
  numInfStepsDesc = "A higher value generally increases quality, but reduces the frames per second of the output stream."
90
  staticLatentsDesc = "This setting increases the frame to frame determisn of the generation. If this is disabled, then the inerence will take continuous large walks across the latent space between frames."
 
92
  promptDesc = "This text will condition the generation of the scene to help guide the content creation."
93
  negPromptDesc = "This text will help deter the generation from converging towards reconstructing the elements described in the text."
94
  outputText = "This inferred imagery expands the field of view from the masked area of the input camera feed."
95
+ latentWalkDesc = "This allows you to walk short spans across the latent space with relatively continuous gradients."
96
 
97
  prompt = gradio.Textbox(label="Prompt", info=promptDesc, placeholder="A person in a room with colored hair", lines=3)
98
  negativePrompt = gradio.Textbox(label="Negative Prompt", info=negPromptDesc, placeholder="Facial hair", lines=3)
 
103
  numInferenceSteps = gradio.Slider(label="Number of Inference Steps", info=numInfStepsDesc, maximum=100, minimum=1, value=20)
104
  generatorSeed = gradio.Slider(label="Generator Seed", info=generatorSeedDesc, maximum=10000, value=lastSeed)
105
  staticLatents = gradio.Checkbox(label="Static Latents", info=staticLatentsDesc, value=True)
106
+ latentWalk = gradio.Slider(label="Latent Walk", info=latentWalkDesc, maximum=1.0, minimum=0.0, value=0.0)
107
  pauseInference = gradio.Checkbox(label="Pause Inference", value=False)
108
+ #modelIndex = gradio.Dropdown(modelNames, label="Model", value="runwayml/stable-diffusion-inpainting")
109
 
110
  description = "This generative machine learning demonstration streams stable diffusion outpainting inference live from your camera on your computer or phone to expand your local reality and create an alternate world. High quality frame to frame determinism is a hard problem to solve for latent diffusion models as the generation is inherently relative to input noise distributions for the latents, and many factors such as the inherent Bayer noise from the camera images as well as anything that is altered between camera images (such as focus, white balance, etc) causes non-determinism between frames. Some methods apply spationtemporal attention, but this demonstration focuses on the control over the input latents to navigate the latent space. <b>Increase the lighting of your physical scene from your camera's perspective, and avoid self shadows of scene content, to improve the quality and consistency of the scene generation.</b>"
111
  article = "This demonstration should initialize automatically from the default values, and run relatively well, but if the output is not an ideal reconstruction of your physical local space from your camera's perspective, then you should adjust the generator seed to take large walks across the latent space. In addition, the static latents can be disable to continously walk the latent space, and then it can be set to static again when a better region of the embedded space is found, but this will increase frame to fram non-determinism. You can also condition the generation using prompts to re-enforce or change aspects of the scene. <b>If you see a black image instead of a generated output image, then you are running into the safety checker. </b>This can trigger inconsistently even when the generated content is purely PG. If this happens, then increase the lighting of the scene and also increase the number of inference steps to improve the generated predicition to reduce the likelihood of the saftey checker triggering a false positive."
112
 
113
+ inputs=[latentWalk, staticLatents, generatorSeed, inputImage, mask, pauseInference, prompt, negativePrompt, guidanceScale, numInferenceSteps]
114
  ux = gradio.Interface(fn=diffuse, title="View Diffusion", article=article, description=description, inputs=inputs, outputs=outputImage, live=True)
115
+
116
+ print("Launching Demo")
117
+
118
  ux.launch()