nigeljw commited on
Commit
f19df7b
1 Parent(s): 738e1c7

Fixed streaming image sequence issue

Browse files
Files changed (1) hide show
  1. app.py +44 -24
app.py CHANGED
@@ -26,7 +26,11 @@ activeLatents = None
26
 
27
  def GenerateNewLatentsForInference():
28
  global latents, oldLatents
29
- oldLatents = latents
 
 
 
 
30
  if deviceStr == "cuda":
31
  latents = torch.randn(latentsSize, device=device, dtype=torch.float16)
32
  else:
@@ -47,7 +51,7 @@ def InitializeOutpainting():
47
 
48
  # Based on: https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/4
49
  # Further optimized to trade a divide operation for a multiply
50
- def slerp(start, end, alpha):
51
  start_norm = torch.norm(start, dim=1, keepdim=True)
52
  end_norm = torch.norm(end, dim=1, keepdim=True)
53
  omega = torch.acos((start*end/(start_norm*end_norm)).sum(1))
@@ -56,7 +60,7 @@ def slerp(start, end, alpha):
56
  second = torch.sin(alpha*omega)/sinOmega
57
  return first.unsqueeze(1)*start + second.unsqueeze(1)*end
58
 
59
- def diffuse(latentWalk, staticLatents, generatorSeed, inputImage, mask, pauseInference, prompt, negativePrompt, guidanceScale, numInferenceSteps):
60
  global lastImage, lastSeed, generator, oldLatentWalk, activeLatents
61
 
62
  if mask is None or pauseInference is True:
@@ -66,7 +70,7 @@ def diffuse(latentWalk, staticLatents, generatorSeed, inputImage, mask, pauseInf
66
  GenerateNewLatentsForInference()
67
 
68
  if oldLatentWalk != latentWalk:
69
- activeLatents = slerp(oldLatents, latents, latentWalk)
70
  oldLatentWalk = latentWalk
71
 
72
  if lastSeed != generatorSeed:
@@ -82,7 +86,8 @@ def diffuse(latentWalk, staticLatents, generatorSeed, inputImage, mask, pauseInf
82
  latents=activeLatents,
83
  generator=generator).images[0]
84
 
85
- lastImage = newImage
 
86
 
87
  return newImage
88
 
@@ -105,25 +110,40 @@ negPromptDesc = "This text will help deter the generation from converging toward
105
  outputText = "This inferred imagery expands the field of view from the masked area of the input camera feed."
106
  latentWalkDesc = "This allows you to walk short spans across the latent space with relatively continuous gradients."
107
 
108
- prompt = gradio.Textbox(label="Prompt", info=promptDesc, placeholder="A person in a room with colored hair", lines=3)
109
- negativePrompt = gradio.Textbox(label="Negative Prompt", info=negPromptDesc, placeholder="Facial hair", lines=3)
110
- inputImage = gradio.Image(label="Input Feed", source="webcam", shape=[512,512], streaming=True)
111
- mask = gradio.Image(label="Mask", type="pil", value=defaultMask)
112
- outputImage = gradio.Image(label="Extrapolated Field of View")
113
- guidanceScale = gradio.Slider(label="Guidance Scale", info="A higher value causes the generation to be more relative to the text prompt conditioning.", maximum=100, minimum=1, value=7.5)
114
- numInferenceSteps = gradio.Slider(label="Number of Inference Steps", info=numInfStepsDesc, maximum=100, minimum=1, value=20)
115
- generatorSeed = gradio.Slider(label="Generator Seed", info=generatorSeedDesc, maximum=10000, value=lastSeed)
116
- staticLatents = gradio.Checkbox(label="Static Latents", info=staticLatentsDesc, value=True)
117
- latentWalk = gradio.Slider(label="Latent Walk", info=latentWalkDesc, maximum=1.0, minimum=0.0, value=0.0)
118
- pauseInference = gradio.Checkbox(label="Pause Inference", value=False)
119
- #modelIndex = gradio.Dropdown(modelNames, label="Model", value="runwayml/stable-diffusion-inpainting")
120
-
121
- description = "This generative machine learning demonstration streams stable diffusion outpainting inference live from your camera on your computer or phone to expand your local reality and create an alternate world. High quality frame to frame determinism is a hard problem to solve for latent diffusion models as the generation is inherently relative to input noise distributions for the latents, and many factors such as the inherent Bayer noise from the camera images as well as anything that is altered between camera images (such as focus, white balance, etc) causes non-determinism between frames. Some methods apply spationtemporal attention, but this demonstration focuses on the control over the input latents to navigate the latent space. <b>Increase the lighting of your physical scene from your camera's perspective, and avoid self shadows of scene content, to improve the quality and consistency of the scene generation.</b>"
122
- article = "This demonstration should initialize automatically from the default values, and run relatively well, but if the output is not an ideal reconstruction of your physical local space from your camera's perspective, then you should adjust the generator seed to take large walks across the latent space. In addition, the static latents can be disable to continously walk the latent space, and then it can be set to static again when a better region of the embedded space is found, but this will increase frame to fram non-determinism. You can also condition the generation using prompts to re-enforce or change aspects of the scene. <b>If you see a black image instead of a generated output image, then you are running into the safety checker. </b>This can trigger inconsistently even when the generated content is purely PG. If this happens, then increase the lighting of the scene and also increase the number of inference steps to improve the generated predicition to reduce the likelihood of the saftey checker triggering a false positive."
123
-
124
- inputs=[latentWalk, staticLatents, generatorSeed, inputImage, mask, pauseInference, prompt, negativePrompt, guidanceScale, numInferenceSteps]
125
- ux = gradio.Interface(fn=diffuse, title="View Diffusion", article=article, description=description, inputs=inputs, outputs=outputImage, live=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  print("Launching Demo")
128
-
129
  ux.launch()
 
26
 
27
  def GenerateNewLatentsForInference():
28
  global latents, oldLatents
29
+ if activeLatents is not None:
30
+ oldLatents = activeLatents
31
+ else:
32
+ oldLatents = latents
33
+
34
  if deviceStr == "cuda":
35
  latents = torch.randn(latentsSize, device=device, dtype=torch.float16)
36
  else:
 
51
 
52
  # Based on: https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/4
53
  # Further optimized to trade a divide operation for a multiply
54
+ def Slerp(start, end, alpha):
55
  start_norm = torch.norm(start, dim=1, keepdim=True)
56
  end_norm = torch.norm(end, dim=1, keepdim=True)
57
  omega = torch.acos((start*end/(start_norm*end_norm)).sum(1))
 
60
  second = torch.sin(alpha*omega)/sinOmega
61
  return first.unsqueeze(1)*start + second.unsqueeze(1)*end
62
 
63
+ def Diffuse(latentWalk, staticLatents, generatorSeed, inputImage, mask, pauseInference, prompt, negativePrompt, guidanceScale, numInferenceSteps):
64
  global lastImage, lastSeed, generator, oldLatentWalk, activeLatents
65
 
66
  if mask is None or pauseInference is True:
 
70
  GenerateNewLatentsForInference()
71
 
72
  if oldLatentWalk != latentWalk:
73
+ activeLatents = Slerp(oldLatents, latents, latentWalk)
74
  oldLatentWalk = latentWalk
75
 
76
  if lastSeed != generatorSeed:
 
86
  latents=activeLatents,
87
  generator=generator).images[0]
88
 
89
+ if not pauseInference:
90
+ lastImage = newImage
91
 
92
  return newImage
93
 
 
110
  outputText = "This inferred imagery expands the field of view from the masked area of the input camera feed."
111
  latentWalkDesc = "This allows you to walk short spans across the latent space with relatively continuous gradients."
112
 
113
+ with gradio.Blocks(live=True) as ux:
114
+ gradio.Markdown("This generative machine learning demonstration streams stable diffusion outpainting inference live from your camera on your computer or phone to expand your local reality and create an alternate world. High quality frame to frame determinism is a hard problem to solve for latent diffusion models as the generation is inherently relative to input noise distributions for the latents, and many factors such as the inherent Bayer noise from the camera images as well as anything that is altered between camera images (such as focus, white balance, etc) causes non-determinism between frames. Some methods apply spationtemporal attention, but this demonstration focuses on the control over the input latents to navigate the latent space. **Increase the lighting of your physical scene from your camera's perspective, and avoid self shadows of scene content, to improve the quality and consistency of the scene generation.**")
115
+ with gradio.Row():
116
+ with gradio.Column():
117
+ staticLatents = gradio.Checkbox(label="Static Latents", info=staticLatentsDesc, value=True)
118
+ inputImage = gradio.Image(label="Input Feed", source="webcam", shape=[512,512], streaming=True)
119
+ mask = gradio.Image(label="Mask", type="pil", value=defaultMask)
120
+ prompt = gradio.Textbox(label="Prompt", info=promptDesc, placeholder="A person in a room with colored hair", lines=3)
121
+ negativePrompt = gradio.Textbox(label="Negative Prompt", info=negPromptDesc, placeholder="Facial hair", lines=3)
122
+ guidanceScale = gradio.Slider(label="Guidance Scale", info="A higher value causes the generation to be more relative to the text prompt conditioning.", maximum=100, minimum=1, value=7.5)
123
+ numInferenceSteps = gradio.Slider(label="Number of Inference Steps", info=numInfStepsDesc, maximum=100, minimum=1, value=20)
124
+ generatorSeed = gradio.Slider(label="Generator Seed", info=generatorSeedDesc, maximum=10000, value=lastSeed)
125
+ #modelIndex = gradio.Dropdown(modelNames, label="Model", value="runwayml/stable-diffusion-inpainting")
126
+ inputImage.style(full_width=True)
127
+
128
+ with gradio.Column():
129
+ #generateLatents = gradio.Button(value="Generate New Latents")
130
+ latentWalk = gradio.Slider(label="Latent Walk", info=latentWalkDesc, maximum=1.0, minimum=0.0, value=0.0)
131
+ outputImage = gradio.Image(label="Extrapolated Field of View")
132
+ pauseInference = gradio.Checkbox(label="Pause Inference", value=False)
133
+
134
+ inferenceInputs = [latentWalk, staticLatents, generatorSeed, inputImage, mask, pauseInference, prompt, negativePrompt, guidanceScale, numInferenceSteps]
135
+ #generateLatents.click(GenerateNewLatentsForInference)
136
+ inputImage.change(fn=Diffuse, inputs=inferenceInputs, outputs=outputImage, show_progress=False)
137
+
138
+ #inputExamples = gradio.Examples([["assets/masks/diamond.png"],
139
+ # ["assets/masks/sphere.png"],
140
+ # ["assets/masks/square.png"]],
141
+ # inputs=inferenceInputs,)
142
+
143
+ gradio.Markdown("This demonstration should initialize automatically from the default values, and run relatively well, but if the output is not an ideal reconstruction of your physical local space from your camera's perspective, then you should adjust the generator seed to take large walks across the latent space. In addition, the static latents can be disable to continously walk the latent space, and then it can be set to static again when a better region of the embedded space is found, but this will increase frame to fram non-determinism. You can also condition the generation using prompts to re-enforce or change aspects of the scene. **If you see a black image instead of a generated output image, then you are running into the safety checker.** This can trigger inconsistently even when the generated content is purely PG. If this happens, then increase the lighting of the scene and also increase the number of inference steps to improve the generated predicition to reduce the likelihood of the saftey checker triggering a false positive.")
144
+
145
+ #inputs=[latentWalk, staticLatents, generatorSeed, inputImage, mask, pauseInference, prompt, negativePrompt, guidanceScale, numInferenceSteps]
146
+ #ux = gradio.Interface(fn=diffuse, title="View Diffusion", article=article, description=description, inputs=inputs, outputs=outputImage, examples=inputExamples, live=True)
147
 
148
  print("Launching Demo")
 
149
  ux.launch()