DeepBeepMeep commited on
Commit
7009e87
·
1 Parent(s): f23c017

Added supports for FLV2V

Browse files
Files changed (1) hide show
  1. wgp.py +36 -30
wgp.py CHANGED
@@ -266,7 +266,7 @@ def process_prompt_and_add_tasks(state, model_choice):
266
  }
267
  inputs.update(extra_inputs)
268
  add_video_task(**inputs)
269
- elif "image2video" in model_filename or "Fun_InP" in model_filename :
270
  image_prompt_type = inputs["image_prompt_type"]
271
 
272
  image_start = inputs["image_start"]
@@ -1366,7 +1366,7 @@ check_loras = args.check_loras ==1
1366
  advanced = args.advanced
1367
 
1368
  transformer_choices_t2v=["ckpts/wan2.1_text2video_1.3B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_quanto_int8.safetensors", "ckpts/wan2.1_Vace_1.3B_preview_bf16.safetensors", "ckpts/wan2.1_recammaster_1.3B_bf16.safetensors"]
1369
- transformer_choices_i2v=["ckpts/wan2.1_image2video_480p_14B_bf16.safetensors", "ckpts/wan2.1_image2video_480p_14B_quanto_int8.safetensors", "ckpts/wan2.1_image2video_720p_14B_bf16.safetensors", "ckpts/wan2.1_image2video_720p_14B_quanto_int8.safetensors", "ckpts/wan2.1_Fun_InP_1.3B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_quanto_int8.safetensors", ]
1370
  transformer_choices = transformer_choices_t2v + transformer_choices_i2v
1371
  text_encoder_choices = ["ckpts/models_t5_umt5-xxl-enc-bf16.safetensors", "ckpts/models_t5_umt5-xxl-enc-quanto_int8.safetensors"]
1372
  server_config_filename = "wgp_config.json"
@@ -1404,9 +1404,10 @@ else:
1404
  server_config = json.loads(text)
1405
 
1406
 
1407
- model_types = [ "t2v_1.3B", "vace_1.3B", "fun_inp_1.3B", "t2v", "i2v", "i2v_720p", "fun_inp", "recam_1.3B"]
1408
  model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B", "fun_inp_1.3B" : "Fun_InP_1.3B", "fun_inp" : "Fun_InP_14B",
1409
- "i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "recam_1.3B": "recammaster_1.3B" }
 
1410
 
1411
 
1412
  def get_model_type(model_filename):
@@ -1416,7 +1417,29 @@ def get_model_type(model_filename):
1416
  raise Exception("Unknown model:" + model_filename)
1417
 
1418
  def test_class_i2v(model_filename):
1419
- return "image2video" in model_filename or "Fun_InP" in model_filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1420
 
1421
 
1422
  def get_model_filename(model_type, quantization):
@@ -1444,12 +1467,12 @@ def get_default_settings(filename):
1444
  return "Several giant wooly mammoths approach treading through a snowy meadow, their long wooly fur lightly blows in the wind as they walk, snow covered trees and dramatic snow capped mountains in the distance, mid afternoon light with wispy clouds and a sun high in the distance creates a warm glow, the low camera view is stunning capturing the large furry mammal with beautiful photography, depth of field."
1445
  else:
1446
  return "A large orange octopus is seen resting on the bottom of the ocean floor, blending in with the sandy and rocky terrain. Its tentacles are spread out around its body, and its eyes are closed. The octopus is unaware of a king crab that is crawling towards it from behind a rock, its claws raised and ready to attack. The crab is brown and spiny, with long legs and antennae. The scene is captured from a wide angle, showing the vastness and depth of the ocean. The water is clear and blue, with rays of sunlight filtering through. The shot is sharp and crisp, with a high dynamic range. The octopus and the crab are in focus, while the background is slightly blurred, creating a depth of field effect."
1447
- i2v = "image2video" in filename or "Fun_InP" in filename
1448
  defaults_filename = get_settings_file_name(filename)
1449
  if not Path(defaults_filename).is_file():
1450
  ui_defaults = {
1451
  "prompt": get_default_prompt(i2v),
1452
- "resolution": "832x480",
1453
  "video_length": 81,
1454
  "num_inference_steps": 30,
1455
  "seed": -1,
@@ -1805,6 +1828,7 @@ def load_models(model_filename):
1805
 
1806
  major, minor = torch.cuda.get_device_capability(args.gpu if len(args.gpu) > 0 else None)
1807
  default_dtype = torch.float16 if major < 8 else torch.bfloat16
 
1808
  if default_dtype == torch.float16 or args.fp16:
1809
  print("Switching to f16 model as GPU architecture doesn't support bf16")
1810
  if "quanto" in model_filename:
@@ -1851,24 +1875,6 @@ def get_default_flow(filename, i2v):
1851
  return 7.0 if "480p" in filename and i2v else 5.0
1852
 
1853
 
1854
- def get_model_name(model_filename):
1855
- if "Fun" in model_filename:
1856
- model_name = "Fun InP image2video"
1857
- model_name += " 14B" if "14B" in model_filename else " 1.3B"
1858
- elif "Vace" in model_filename:
1859
- model_name = "Vace ControlNet"
1860
- model_name += " 14B" if "14B" in model_filename else " 1.3B"
1861
- elif "image" in model_filename:
1862
- model_name = "Wan2.1 image2video"
1863
- model_name += " 720p" if "720p" in model_filename else " 480p"
1864
- elif "recam" in model_filename:
1865
- model_name = "ReCamMaster"
1866
- model_name += " 14B" if "14B" in model_filename else " 1.3B"
1867
- else:
1868
- model_name = "Wan2.1 text2video"
1869
- model_name += " 14B" if "14B" in model_filename else " 1.3B"
1870
-
1871
- return model_name
1872
 
1873
 
1874
 
@@ -2594,7 +2600,7 @@ def generate_video(
2594
  slg_end = slg_end_perc/100,
2595
  cfg_star_switch = cfg_star_switch,
2596
  cfg_zero_step = cfg_zero_step,
2597
- add_frames_for_end_image = not "Fun_InP" in model_filename,
2598
  )
2599
  else:
2600
  samples = wan_model.generate(
@@ -3221,7 +3227,7 @@ def prepare_inputs_dict(target, inputs ):
3221
  if target == "settings":
3222
  return inputs
3223
 
3224
- if not any(k in model_filename for k in ["image2video", "Fun_InP"]):
3225
  inputs.pop("image_prompt_type")
3226
 
3227
 
@@ -3563,7 +3569,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
3563
  if not update_form:
3564
  state = gr.State(state_dict)
3565
  trigger_refresh_input_type = gr.Text(interactive= False, visible= False)
3566
- with gr.Column(visible= "image2video" in model_filename or "Fun_InP" in model_filename ) as image_prompt_column:
3567
  image_prompt_type_value= ui_defaults.get("image_prompt_type","S")
3568
  image_prompt_type = gr.Radio( [("Use only a Start Image", "S"),("Use both a Start and an End Image", "SE")], value =image_prompt_type_value, label="Location", show_label= False, scale= 3)
3569
 
@@ -3676,7 +3682,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
3676
  wizard_prompt_activated_var = gr.Text(wizard_prompt_activated, visible= False)
3677
  wizard_variables_var = gr.Text(wizard_variables, visible = False)
3678
  with gr.Row():
3679
- if "image2video" in model_filename or "Fun_InP" in model_filename:
3680
  resolution = gr.Dropdown(
3681
  choices=[
3682
  # 720p
@@ -4670,7 +4676,7 @@ def create_demo():
4670
  theme = gr.themes.Soft(font=["Verdana"], primary_hue="sky", neutral_hue="slate", text_size="md")
4671
 
4672
  with gr.Blocks(css=css, theme=theme, title= "Wan2GP") as demo:
4673
- gr.Markdown("<div align=center><H1>Wan<SUP>GP</SUP> v4.1 <FONT SIZE=4>by <I>DeepBeepMeep</I></FONT> <FONT SIZE=3>") # (<A HREF='https://github.com/deepbeepmeep/Wan2GP'>Updates</A>)</FONT SIZE=3></H1></div>")
4674
  global model_list
4675
 
4676
  tab_state = gr.State({ "tab_no":0 })
 
266
  }
267
  inputs.update(extra_inputs)
268
  add_video_task(**inputs)
269
+ elif test_class_i2v(model_filename) :
270
  image_prompt_type = inputs["image_prompt_type"]
271
 
272
  image_start = inputs["image_start"]
 
1366
  advanced = args.advanced
1367
 
1368
  transformer_choices_t2v=["ckpts/wan2.1_text2video_1.3B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_quanto_int8.safetensors", "ckpts/wan2.1_Vace_1.3B_preview_bf16.safetensors", "ckpts/wan2.1_recammaster_1.3B_bf16.safetensors"]
1369
+ transformer_choices_i2v=["ckpts/wan2.1_image2video_480p_14B_bf16.safetensors", "ckpts/wan2.1_image2video_480p_14B_quanto_int8.safetensors", "ckpts/wan2.1_image2video_720p_14B_bf16.safetensors", "ckpts/wan2.1_image2video_720p_14B_quanto_int8.safetensors", "ckpts/wan2.1_Fun_InP_1.3B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_quanto_int8.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_bf16.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_quanto_int8.safetensors"]
1370
  transformer_choices = transformer_choices_t2v + transformer_choices_i2v
1371
  text_encoder_choices = ["ckpts/models_t5_umt5-xxl-enc-bf16.safetensors", "ckpts/models_t5_umt5-xxl-enc-quanto_int8.safetensors"]
1372
  server_config_filename = "wgp_config.json"
 
1404
  server_config = json.loads(text)
1405
 
1406
 
1407
+ model_types = [ "t2v_1.3B", "vace_1.3B", "fun_inp_1.3B", "t2v", "i2v", "i2v_720p", "fun_inp", "recam_1.3B", "flf2v_720p"]
1408
  model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B", "fun_inp_1.3B" : "Fun_InP_1.3B", "fun_inp" : "Fun_InP_14B",
1409
+ "i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "recam_1.3B": "recammaster_1.3B",
1410
+ "flf2v_720p" : "FLF2V_720p" }
1411
 
1412
 
1413
  def get_model_type(model_filename):
 
1417
  raise Exception("Unknown model:" + model_filename)
1418
 
1419
  def test_class_i2v(model_filename):
1420
+ return "image2video" in model_filename or "Fun_InP" in model_filename or "FLF2V" in model_filename
1421
+
1422
+ def get_model_name(model_filename):
1423
+ if "Fun" in model_filename:
1424
+ model_name = "Fun InP image2video"
1425
+ model_name += " 14B" if "14B" in model_filename else " 1.3B"
1426
+ elif "Vace" in model_filename:
1427
+ model_name = "Vace ControlNet"
1428
+ model_name += " 14B" if "14B" in model_filename else " 1.3B"
1429
+ elif "image" in model_filename:
1430
+ model_name = "Wan2.1 image2video"
1431
+ model_name += " 720p" if "720p" in model_filename else " 480p"
1432
+ elif "recam" in model_filename:
1433
+ model_name = "ReCamMaster"
1434
+ model_name += " 14B" if "14B" in model_filename else " 1.3B"
1435
+ elif "FLF2V" in model_filename:
1436
+ model_name = "Wan2.1 FLF2V"
1437
+ model_name += " 720p" if "720p" in model_filename else " 480p"
1438
+ else:
1439
+ model_name = "Wan2.1 text2video"
1440
+ model_name += " 14B" if "14B" in model_filename else " 1.3B"
1441
+
1442
+ return model_name
1443
 
1444
 
1445
  def get_model_filename(model_type, quantization):
 
1467
  return "Several giant wooly mammoths approach treading through a snowy meadow, their long wooly fur lightly blows in the wind as they walk, snow covered trees and dramatic snow capped mountains in the distance, mid afternoon light with wispy clouds and a sun high in the distance creates a warm glow, the low camera view is stunning capturing the large furry mammal with beautiful photography, depth of field."
1468
  else:
1469
  return "A large orange octopus is seen resting on the bottom of the ocean floor, blending in with the sandy and rocky terrain. Its tentacles are spread out around its body, and its eyes are closed. The octopus is unaware of a king crab that is crawling towards it from behind a rock, its claws raised and ready to attack. The crab is brown and spiny, with long legs and antennae. The scene is captured from a wide angle, showing the vastness and depth of the ocean. The water is clear and blue, with rays of sunlight filtering through. The shot is sharp and crisp, with a high dynamic range. The octopus and the crab are in focus, while the background is slightly blurred, creating a depth of field effect."
1470
+ i2v = test_class_i2v(filename)
1471
  defaults_filename = get_settings_file_name(filename)
1472
  if not Path(defaults_filename).is_file():
1473
  ui_defaults = {
1474
  "prompt": get_default_prompt(i2v),
1475
+ "resolution": "1280x720" if "720p" in filename else "832x480",
1476
  "video_length": 81,
1477
  "num_inference_steps": 30,
1478
  "seed": -1,
 
1828
 
1829
  major, minor = torch.cuda.get_device_capability(args.gpu if len(args.gpu) > 0 else None)
1830
  default_dtype = torch.float16 if major < 8 else torch.bfloat16
1831
+ # default_dtype = torch.bfloat16
1832
  if default_dtype == torch.float16 or args.fp16:
1833
  print("Switching to f16 model as GPU architecture doesn't support bf16")
1834
  if "quanto" in model_filename:
 
1875
  return 7.0 if "480p" in filename and i2v else 5.0
1876
 
1877
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1878
 
1879
 
1880
 
 
2600
  slg_end = slg_end_perc/100,
2601
  cfg_star_switch = cfg_star_switch,
2602
  cfg_zero_step = cfg_zero_step,
2603
+ add_frames_for_end_image = "image2video" in model_filename
2604
  )
2605
  else:
2606
  samples = wan_model.generate(
 
3227
  if target == "settings":
3228
  return inputs
3229
 
3230
+ if not test_class_i2v(model_filename):
3231
  inputs.pop("image_prompt_type")
3232
 
3233
 
 
3569
  if not update_form:
3570
  state = gr.State(state_dict)
3571
  trigger_refresh_input_type = gr.Text(interactive= False, visible= False)
3572
+ with gr.Column(visible= test_class_i2v(model_filename) ) as image_prompt_column:
3573
  image_prompt_type_value= ui_defaults.get("image_prompt_type","S")
3574
  image_prompt_type = gr.Radio( [("Use only a Start Image", "S"),("Use both a Start and an End Image", "SE")], value =image_prompt_type_value, label="Location", show_label= False, scale= 3)
3575
 
 
3682
  wizard_prompt_activated_var = gr.Text(wizard_prompt_activated, visible= False)
3683
  wizard_variables_var = gr.Text(wizard_variables, visible = False)
3684
  with gr.Row():
3685
+ if test_class_i2v(model_filename):
3686
  resolution = gr.Dropdown(
3687
  choices=[
3688
  # 720p
 
4676
  theme = gr.themes.Soft(font=["Verdana"], primary_hue="sky", neutral_hue="slate", text_size="md")
4677
 
4678
  with gr.Blocks(css=css, theme=theme, title= "Wan2GP") as demo:
4679
+ gr.Markdown("<div align=center><H1>Wan<SUP>GP</SUP> v4.2 <FONT SIZE=4>by <I>DeepBeepMeep</I></FONT> <FONT SIZE=3>") # (<A HREF='https://github.com/deepbeepmeep/Wan2GP'>Updates</A>)</FONT SIZE=3></H1></div>")
4680
  global model_list
4681
 
4682
  tab_state = gr.State({ "tab_no":0 })