ford442 commited on
Commit
85f9298
·
verified ·
1 Parent(s): 50da42d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -28
app.py CHANGED
@@ -14,10 +14,6 @@ os.putenv('HF_HUB_ENABLE_HF_TRANSFER','1')
14
  import torch
15
  # --- NEW ---
16
  # Import the OpenCV library
17
- import cv2
18
- import gc
19
-
20
- from image_gen_aux import UpscaleWithModel
21
 
22
  torch.backends.cuda.matmul.allow_tf32 = False
23
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
@@ -29,6 +25,11 @@ torch.backends.cuda.preferred_blas_library="cublas"
29
  torch.backends.cuda.preferred_linalg_library="cusolver"
30
  torch.set_float32_matmul_precision("highest")
31
 
 
 
 
 
 
32
  import gradio as gr
33
  import numpy as np
34
  import random
@@ -40,14 +41,30 @@ from PIL import Image
40
  from huggingface_hub import hf_hub_download
41
  import shutil
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  MAX_SEED = np.iinfo(np.int32).max
44
 
 
 
45
  upscaler = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
46
 
47
- #import diffusers
48
- from diffusers import StableDiffusionXLImg2ImgPipeline, AutoencoderKL
49
  print("Loading SDXL Image-to-Image pipeline...")
50
  #vaeX = AutoencoderKL.from_pretrained('stabilityai/stable-diffusion-xl-refiner-1.0',subfolder='vae')
 
51
  enhancer_pipeline = StableDiffusionXLImg2ImgPipeline.from_pretrained(
52
  #"stabilityai/stable-diffusion-xl-base-1.0",
53
  "ford442/stable-diffusion-xl-refiner-1.0-bf16",
@@ -57,24 +74,13 @@ enhancer_pipeline = StableDiffusionXLImg2ImgPipeline.from_pretrained(
57
  requires_aesthetics_score=True,
58
  #vae=None
59
  )
 
60
  #enhancer_pipeline.vae=vaeX
61
  enhancer_pipeline.vae.set_default_attn_processor()
62
 
63
  enhancer_pipeline.to("cpu")
64
  print("SDXL Image-to-Image pipeline loaded successfully.")
65
 
66
- from inference import (
67
- create_ltx_video_pipeline,
68
- create_latent_upsampler,
69
- load_image_to_tensor_with_resize_and_crop,
70
- seed_everething,
71
- get_device,
72
- calculate_padding,
73
- load_media_file
74
- )
75
- from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
76
- from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
77
-
78
  config_file_path = "configs/ltxv-13b-0.9.8-distilled.yaml"
79
  with open(config_file_path, "r") as file:
80
  PIPELINE_CONFIG_YAML = yaml.safe_load(file)
@@ -83,7 +89,6 @@ LTX_REPO = "Lightricks/LTX-Video"
83
  MAX_IMAGE_SIZE = PIPELINE_CONFIG_YAML.get("max_resolution", 1280)
84
  MAX_NUM_FRAMES = 900
85
 
86
- # ... (model loading code is unchanged) ...
87
  pipeline_instance = None
88
  latent_upsampler_instance = None
89
  models_dir = "downloaded_models_gradio_cpu_init"
@@ -120,21 +125,24 @@ def get_duration(*args, **kwargs):
120
  if duration_ui > 5.0: return 100
121
  return 90
122
 
123
-
 
 
 
 
 
 
 
124
  @spaces.GPU(duration=45)
125
  def enhance_frame(prompt, image_to_enhance: Image.Image):
126
  try:
127
  print("Moving enhancer pipeline to GPU...")
128
  seed = random.randint(0, MAX_SEED)
129
- generator = torch.Generator(device='cpu').manual_seed(seed)
130
  enhancer_pipeline.to("cuda",torch.bfloat16)
131
  refine_prompt = prompt +" high detail, sharp focus, 8k, professional"
132
- enhanced_image = enhancer_pipeline(prompt=refine_prompt, image=image_to_enhance, strength=0.1, generator=generator, num_inference_steps=220).images[0]
133
  print("Frame enhancement successful.")
134
- print("Doing super-resolution.")
135
- with torch.no_grad():
136
- upscale = upscaler(enhanced_image, tiling=True, tile_width=1024, tile_height=1024)
137
- enhanced_image = upscale.resize((upscale.width // 4, upscale.height // 4), Image.LANCZOS)
138
  except Exception as e:
139
  print(f"Error during frame enhancement: {e}")
140
  gr.Warning("Frame enhancement failed. Using original frame.")
@@ -147,7 +155,7 @@ def enhance_frame(prompt, image_to_enhance: Image.Image):
147
  return enhanced_image
148
 
149
 
150
- def use_last_frame_as_input(prompt, video_filepath, do_enhance):
151
  if not video_filepath or not os.path.exists(video_filepath):
152
  gr.Warning("No video clip available.")
153
  return None, gr.update()
@@ -162,12 +170,18 @@ def use_last_frame_as_input(prompt, video_filepath, do_enhance):
162
  # 1. Immediately yield the original frame to the UI
163
  print("Displaying original last frame...")
164
  yield pil_image, gr.update()
 
 
165
  if do_enhance:
166
  enhanced_image = enhance_frame(prompt, pil_image)
 
 
167
  # 2. Yield the enhanced frame and switch the tab
168
  print("Displaying enhanced frame and switching tab...")
169
  yield enhanced_image, gr.update(selected="i2v_tab")
170
  else:
 
 
171
  # If not enhancing, just switch the tab
172
  yield pil_image, gr.update(selected="i2v_tab")
173
  except Exception as e:
@@ -210,6 +224,8 @@ def generate(prompt, negative_prompt, clips_list, input_image_filepath, input_vi
210
  raise gr.Error("input_video_filepath is required for video-to-video mode")
211
  if randomize_seed: seed_ui = random.randint(0, 2**32 - 1)
212
  seed_everething(int(seed_ui))
 
 
213
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(round((max(1, round(duration_ui * fps)) - 1.0) / 8.0) * 8 + 1)))
214
  actual_height, actual_width = int(height_ui), int(width_ui)
215
  height_padded, width_padded = ((actual_height - 1) // 32 + 1) * 32, ((actual_width - 1) // 32 + 1) * 32
@@ -279,6 +295,7 @@ with gr.Blocks(css=css) as demo:
279
  duration_input = gr.Slider(label="Clip Duration (seconds)", minimum=1.0, maximum=10.0, value=2.0, step=0.1)
280
  improve_texture = gr.Checkbox(label="Improve Texture (multi-scale)", value=True)
281
  enhance_checkbox = gr.Checkbox(label="Improve Frame (SDXL Refiner)", value=True)
 
282
  with gr.Column():
283
  output_video = gr.Video(label="Last Generated Clip", interactive=False)
284
  use_last_frame_button = gr.Button("Use Last Frame as Input Image", visible=False)
@@ -320,7 +337,7 @@ with gr.Blocks(css=css) as demo:
320
  t2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=t2v_inputs, outputs=gen_outputs, api_name="text_to_video")
321
  i2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=i2v_inputs, outputs=gen_outputs, api_name="image_to_video")
322
  v2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=v2v_inputs, outputs=gen_outputs, api_name="video_to_video")
323
- use_last_frame_button.click(fn=use_last_frame_as_input, inputs=[i2v_prompt,output_video,enhance_checkbox], outputs=[image_i2v, tabs])
324
  stitch_button.click(fn=stitch_videos, inputs=[clips_state], outputs=[final_video_output])
325
  clear_button.click(fn=clear_clips, outputs=[clips_state, clip_counter_display, output_video, final_video_output])
326
  if __name__ == "__main__":
 
14
  import torch
15
  # --- NEW ---
16
  # Import the OpenCV library
 
 
 
 
17
 
18
  torch.backends.cuda.matmul.allow_tf32 = False
19
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 
25
  torch.backends.cuda.preferred_linalg_library="cusolver"
26
  torch.set_float32_matmul_precision("highest")
27
 
28
+ import cv2
29
+ import gc
30
+
31
+ from image_gen_aux import UpscaleWithModel
32
+
33
  import gradio as gr
34
  import numpy as np
35
  import random
 
41
  from huggingface_hub import hf_hub_download
42
  import shutil
43
 
44
+ from diffusers import StableDiffusionXLImg2ImgPipeline, AutoencoderKL
45
+
46
+ from inference import (
47
+ create_ltx_video_pipeline,
48
+ create_latent_upsampler,
49
+ load_image_to_tensor_with_resize_and_crop,
50
+ seed_everething,
51
+ get_device,
52
+ calculate_padding,
53
+ load_media_file
54
+ )
55
+
56
+ from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
57
+ from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
58
+
59
  MAX_SEED = np.iinfo(np.int32).max
60
 
61
+ generator = torch.Generator(device='cuda').manual_seed(seed)
62
+
63
  upscaler = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
64
 
 
 
65
  print("Loading SDXL Image-to-Image pipeline...")
66
  #vaeX = AutoencoderKL.from_pretrained('stabilityai/stable-diffusion-xl-refiner-1.0',subfolder='vae')
67
+
68
  enhancer_pipeline = StableDiffusionXLImg2ImgPipeline.from_pretrained(
69
  #"stabilityai/stable-diffusion-xl-base-1.0",
70
  "ford442/stable-diffusion-xl-refiner-1.0-bf16",
 
74
  requires_aesthetics_score=True,
75
  #vae=None
76
  )
77
+
78
  #enhancer_pipeline.vae=vaeX
79
  enhancer_pipeline.vae.set_default_attn_processor()
80
 
81
  enhancer_pipeline.to("cpu")
82
  print("SDXL Image-to-Image pipeline loaded successfully.")
83
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  config_file_path = "configs/ltxv-13b-0.9.8-distilled.yaml"
85
  with open(config_file_path, "r") as file:
86
  PIPELINE_CONFIG_YAML = yaml.safe_load(file)
 
89
  MAX_IMAGE_SIZE = PIPELINE_CONFIG_YAML.get("max_resolution", 1280)
90
  MAX_NUM_FRAMES = 900
91
 
 
92
  pipeline_instance = None
93
  latent_upsampler_instance = None
94
  models_dir = "downloaded_models_gradio_cpu_init"
 
125
  if duration_ui > 5.0: return 100
126
  return 90
127
 
128
+ @spaces.GPU(duration=30)
129
+ def superres_image(image_to_enhance: Image.Image)
130
+ print("Doing super-resolution.")
131
+ with torch.no_grad():
132
+ upscale = upscaler(enhanced_image, tiling=True, tile_width=1024, tile_height=1024)
133
+ enhanced_image = upscale.resize((upscale.width // 2, upscale.height // 2), Image.LANCZOS)
134
+ return enhanced_image
135
+
136
  @spaces.GPU(duration=45)
137
  def enhance_frame(prompt, image_to_enhance: Image.Image):
138
  try:
139
  print("Moving enhancer pipeline to GPU...")
140
  seed = random.randint(0, MAX_SEED)
141
+ generator.manual_seed(seed)
142
  enhancer_pipeline.to("cuda",torch.bfloat16)
143
  refine_prompt = prompt +" high detail, sharp focus, 8k, professional"
144
+ enhanced_image = enhancer_pipeline(prompt=refine_prompt, image=image_to_enhance, strength=0.1, generator=generator, num_inference_steps=160).images[0]
145
  print("Frame enhancement successful.")
 
 
 
 
146
  except Exception as e:
147
  print(f"Error during frame enhancement: {e}")
148
  gr.Warning("Frame enhancement failed. Using original frame.")
 
155
  return enhanced_image
156
 
157
 
158
+ def use_last_frame_as_input(prompt, video_filepath, do_enhance, do_superres):
159
  if not video_filepath or not os.path.exists(video_filepath):
160
  gr.Warning("No video clip available.")
161
  return None, gr.update()
 
170
  # 1. Immediately yield the original frame to the UI
171
  print("Displaying original last frame...")
172
  yield pil_image, gr.update()
173
+ if do_superres:
174
+ pil_image = superres_image(pil_image)
175
  if do_enhance:
176
  enhanced_image = enhance_frame(prompt, pil_image)
177
+ if do_superres:
178
+ enhanced_image = superres_image(enhanced_image)
179
  # 2. Yield the enhanced frame and switch the tab
180
  print("Displaying enhanced frame and switching tab...")
181
  yield enhanced_image, gr.update(selected="i2v_tab")
182
  else:
183
+ if do_superres:
184
+ pil_image = superres_image(pil_image)
185
  # If not enhancing, just switch the tab
186
  yield pil_image, gr.update(selected="i2v_tab")
187
  except Exception as e:
 
224
  raise gr.Error("input_video_filepath is required for video-to-video mode")
225
  if randomize_seed: seed_ui = random.randint(0, 2**32 - 1)
226
  seed_everething(int(seed_ui))
227
+ seed = random.randint(0, MAX_SEED)
228
+ generator.manual_seed(seed)
229
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(round((max(1, round(duration_ui * fps)) - 1.0) / 8.0) * 8 + 1)))
230
  actual_height, actual_width = int(height_ui), int(width_ui)
231
  height_padded, width_padded = ((actual_height - 1) // 32 + 1) * 32, ((actual_width - 1) // 32 + 1) * 32
 
295
  duration_input = gr.Slider(label="Clip Duration (seconds)", minimum=1.0, maximum=10.0, value=2.0, step=0.1)
296
  improve_texture = gr.Checkbox(label="Improve Texture (multi-scale)", value=True)
297
  enhance_checkbox = gr.Checkbox(label="Improve Frame (SDXL Refiner)", value=True)
298
+ superres_checkbox = gr.Checkbox(label="Upscale Frame (ClearRealityV1)", value=True)
299
  with gr.Column():
300
  output_video = gr.Video(label="Last Generated Clip", interactive=False)
301
  use_last_frame_button = gr.Button("Use Last Frame as Input Image", visible=False)
 
337
  t2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=t2v_inputs, outputs=gen_outputs, api_name="text_to_video")
338
  i2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=i2v_inputs, outputs=gen_outputs, api_name="image_to_video")
339
  v2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=v2v_inputs, outputs=gen_outputs, api_name="video_to_video")
340
+ use_last_frame_button.click(fn=use_last_frame_as_input, inputs=[i2v_prompt,output_video,enhance_checkbox, superres_checkbox], outputs=[image_i2v, tabs])
341
  stitch_button.click(fn=stitch_videos, inputs=[clips_state], outputs=[final_video_output])
342
  clear_button.click(fn=clear_clips, outputs=[clips_state, clip_counter_display, output_video, final_video_output])
343
  if __name__ == "__main__":