Commit
·
7009e87
1
Parent(s):
f23c017
Added supports for FLV2V
Browse files
wgp.py
CHANGED
|
@@ -266,7 +266,7 @@ def process_prompt_and_add_tasks(state, model_choice):
|
|
| 266 |
}
|
| 267 |
inputs.update(extra_inputs)
|
| 268 |
add_video_task(**inputs)
|
| 269 |
-
elif
|
| 270 |
image_prompt_type = inputs["image_prompt_type"]
|
| 271 |
|
| 272 |
image_start = inputs["image_start"]
|
|
@@ -1366,7 +1366,7 @@ check_loras = args.check_loras ==1
|
|
| 1366 |
advanced = args.advanced
|
| 1367 |
|
| 1368 |
transformer_choices_t2v=["ckpts/wan2.1_text2video_1.3B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_quanto_int8.safetensors", "ckpts/wan2.1_Vace_1.3B_preview_bf16.safetensors", "ckpts/wan2.1_recammaster_1.3B_bf16.safetensors"]
|
| 1369 |
-
transformer_choices_i2v=["ckpts/wan2.1_image2video_480p_14B_bf16.safetensors", "ckpts/wan2.1_image2video_480p_14B_quanto_int8.safetensors", "ckpts/wan2.1_image2video_720p_14B_bf16.safetensors", "ckpts/wan2.1_image2video_720p_14B_quanto_int8.safetensors", "ckpts/wan2.1_Fun_InP_1.3B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_quanto_int8.safetensors", ]
|
| 1370 |
transformer_choices = transformer_choices_t2v + transformer_choices_i2v
|
| 1371 |
text_encoder_choices = ["ckpts/models_t5_umt5-xxl-enc-bf16.safetensors", "ckpts/models_t5_umt5-xxl-enc-quanto_int8.safetensors"]
|
| 1372 |
server_config_filename = "wgp_config.json"
|
|
@@ -1404,9 +1404,10 @@ else:
|
|
| 1404 |
server_config = json.loads(text)
|
| 1405 |
|
| 1406 |
|
| 1407 |
-
model_types = [ "t2v_1.3B", "vace_1.3B", "fun_inp_1.3B", "t2v", "i2v", "i2v_720p", "fun_inp", "recam_1.3B"]
|
| 1408 |
model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B", "fun_inp_1.3B" : "Fun_InP_1.3B", "fun_inp" : "Fun_InP_14B",
|
| 1409 |
-
"i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "recam_1.3B": "recammaster_1.3B"
|
|
|
|
| 1410 |
|
| 1411 |
|
| 1412 |
def get_model_type(model_filename):
|
|
@@ -1416,7 +1417,29 @@ def get_model_type(model_filename):
|
|
| 1416 |
raise Exception("Unknown model:" + model_filename)
|
| 1417 |
|
| 1418 |
def test_class_i2v(model_filename):
|
| 1419 |
-
return "image2video" in model_filename or "Fun_InP" in model_filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1420 |
|
| 1421 |
|
| 1422 |
def get_model_filename(model_type, quantization):
|
|
@@ -1444,12 +1467,12 @@ def get_default_settings(filename):
|
|
| 1444 |
return "Several giant wooly mammoths approach treading through a snowy meadow, their long wooly fur lightly blows in the wind as they walk, snow covered trees and dramatic snow capped mountains in the distance, mid afternoon light with wispy clouds and a sun high in the distance creates a warm glow, the low camera view is stunning capturing the large furry mammal with beautiful photography, depth of field."
|
| 1445 |
else:
|
| 1446 |
return "A large orange octopus is seen resting on the bottom of the ocean floor, blending in with the sandy and rocky terrain. Its tentacles are spread out around its body, and its eyes are closed. The octopus is unaware of a king crab that is crawling towards it from behind a rock, its claws raised and ready to attack. The crab is brown and spiny, with long legs and antennae. The scene is captured from a wide angle, showing the vastness and depth of the ocean. The water is clear and blue, with rays of sunlight filtering through. The shot is sharp and crisp, with a high dynamic range. The octopus and the crab are in focus, while the background is slightly blurred, creating a depth of field effect."
|
| 1447 |
-
i2v =
|
| 1448 |
defaults_filename = get_settings_file_name(filename)
|
| 1449 |
if not Path(defaults_filename).is_file():
|
| 1450 |
ui_defaults = {
|
| 1451 |
"prompt": get_default_prompt(i2v),
|
| 1452 |
-
"resolution": "832x480",
|
| 1453 |
"video_length": 81,
|
| 1454 |
"num_inference_steps": 30,
|
| 1455 |
"seed": -1,
|
|
@@ -1805,6 +1828,7 @@ def load_models(model_filename):
|
|
| 1805 |
|
| 1806 |
major, minor = torch.cuda.get_device_capability(args.gpu if len(args.gpu) > 0 else None)
|
| 1807 |
default_dtype = torch.float16 if major < 8 else torch.bfloat16
|
|
|
|
| 1808 |
if default_dtype == torch.float16 or args.fp16:
|
| 1809 |
print("Switching to f16 model as GPU architecture doesn't support bf16")
|
| 1810 |
if "quanto" in model_filename:
|
|
@@ -1851,24 +1875,6 @@ def get_default_flow(filename, i2v):
|
|
| 1851 |
return 7.0 if "480p" in filename and i2v else 5.0
|
| 1852 |
|
| 1853 |
|
| 1854 |
-
def get_model_name(model_filename):
|
| 1855 |
-
if "Fun" in model_filename:
|
| 1856 |
-
model_name = "Fun InP image2video"
|
| 1857 |
-
model_name += " 14B" if "14B" in model_filename else " 1.3B"
|
| 1858 |
-
elif "Vace" in model_filename:
|
| 1859 |
-
model_name = "Vace ControlNet"
|
| 1860 |
-
model_name += " 14B" if "14B" in model_filename else " 1.3B"
|
| 1861 |
-
elif "image" in model_filename:
|
| 1862 |
-
model_name = "Wan2.1 image2video"
|
| 1863 |
-
model_name += " 720p" if "720p" in model_filename else " 480p"
|
| 1864 |
-
elif "recam" in model_filename:
|
| 1865 |
-
model_name = "ReCamMaster"
|
| 1866 |
-
model_name += " 14B" if "14B" in model_filename else " 1.3B"
|
| 1867 |
-
else:
|
| 1868 |
-
model_name = "Wan2.1 text2video"
|
| 1869 |
-
model_name += " 14B" if "14B" in model_filename else " 1.3B"
|
| 1870 |
-
|
| 1871 |
-
return model_name
|
| 1872 |
|
| 1873 |
|
| 1874 |
|
|
@@ -2594,7 +2600,7 @@ def generate_video(
|
|
| 2594 |
slg_end = slg_end_perc/100,
|
| 2595 |
cfg_star_switch = cfg_star_switch,
|
| 2596 |
cfg_zero_step = cfg_zero_step,
|
| 2597 |
-
add_frames_for_end_image =
|
| 2598 |
)
|
| 2599 |
else:
|
| 2600 |
samples = wan_model.generate(
|
|
@@ -3221,7 +3227,7 @@ def prepare_inputs_dict(target, inputs ):
|
|
| 3221 |
if target == "settings":
|
| 3222 |
return inputs
|
| 3223 |
|
| 3224 |
-
if not
|
| 3225 |
inputs.pop("image_prompt_type")
|
| 3226 |
|
| 3227 |
|
|
@@ -3563,7 +3569,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
| 3563 |
if not update_form:
|
| 3564 |
state = gr.State(state_dict)
|
| 3565 |
trigger_refresh_input_type = gr.Text(interactive= False, visible= False)
|
| 3566 |
-
with gr.Column(visible=
|
| 3567 |
image_prompt_type_value= ui_defaults.get("image_prompt_type","S")
|
| 3568 |
image_prompt_type = gr.Radio( [("Use only a Start Image", "S"),("Use both a Start and an End Image", "SE")], value =image_prompt_type_value, label="Location", show_label= False, scale= 3)
|
| 3569 |
|
|
@@ -3676,7 +3682,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
| 3676 |
wizard_prompt_activated_var = gr.Text(wizard_prompt_activated, visible= False)
|
| 3677 |
wizard_variables_var = gr.Text(wizard_variables, visible = False)
|
| 3678 |
with gr.Row():
|
| 3679 |
-
if
|
| 3680 |
resolution = gr.Dropdown(
|
| 3681 |
choices=[
|
| 3682 |
# 720p
|
|
@@ -4670,7 +4676,7 @@ def create_demo():
|
|
| 4670 |
theme = gr.themes.Soft(font=["Verdana"], primary_hue="sky", neutral_hue="slate", text_size="md")
|
| 4671 |
|
| 4672 |
with gr.Blocks(css=css, theme=theme, title= "Wan2GP") as demo:
|
| 4673 |
-
gr.Markdown("<div align=center><H1>Wan<SUP>GP</SUP> v4.
|
| 4674 |
global model_list
|
| 4675 |
|
| 4676 |
tab_state = gr.State({ "tab_no":0 })
|
|
|
|
| 266 |
}
|
| 267 |
inputs.update(extra_inputs)
|
| 268 |
add_video_task(**inputs)
|
| 269 |
+
elif test_class_i2v(model_filename) :
|
| 270 |
image_prompt_type = inputs["image_prompt_type"]
|
| 271 |
|
| 272 |
image_start = inputs["image_start"]
|
|
|
|
| 1366 |
advanced = args.advanced
|
| 1367 |
|
| 1368 |
transformer_choices_t2v=["ckpts/wan2.1_text2video_1.3B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_quanto_int8.safetensors", "ckpts/wan2.1_Vace_1.3B_preview_bf16.safetensors", "ckpts/wan2.1_recammaster_1.3B_bf16.safetensors"]
|
| 1369 |
+
transformer_choices_i2v=["ckpts/wan2.1_image2video_480p_14B_bf16.safetensors", "ckpts/wan2.1_image2video_480p_14B_quanto_int8.safetensors", "ckpts/wan2.1_image2video_720p_14B_bf16.safetensors", "ckpts/wan2.1_image2video_720p_14B_quanto_int8.safetensors", "ckpts/wan2.1_Fun_InP_1.3B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_quanto_int8.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_bf16.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_quanto_int8.safetensors"]
|
| 1370 |
transformer_choices = transformer_choices_t2v + transformer_choices_i2v
|
| 1371 |
text_encoder_choices = ["ckpts/models_t5_umt5-xxl-enc-bf16.safetensors", "ckpts/models_t5_umt5-xxl-enc-quanto_int8.safetensors"]
|
| 1372 |
server_config_filename = "wgp_config.json"
|
|
|
|
| 1404 |
server_config = json.loads(text)
|
| 1405 |
|
| 1406 |
|
| 1407 |
+
model_types = [ "t2v_1.3B", "vace_1.3B", "fun_inp_1.3B", "t2v", "i2v", "i2v_720p", "fun_inp", "recam_1.3B", "flf2v_720p"]
|
| 1408 |
model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B", "fun_inp_1.3B" : "Fun_InP_1.3B", "fun_inp" : "Fun_InP_14B",
|
| 1409 |
+
"i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "recam_1.3B": "recammaster_1.3B",
|
| 1410 |
+
"flf2v_720p" : "FLF2V_720p" }
|
| 1411 |
|
| 1412 |
|
| 1413 |
def get_model_type(model_filename):
|
|
|
|
| 1417 |
raise Exception("Unknown model:" + model_filename)
|
| 1418 |
|
| 1419 |
def test_class_i2v(model_filename):
|
| 1420 |
+
return "image2video" in model_filename or "Fun_InP" in model_filename or "FLF2V" in model_filename
|
| 1421 |
+
|
| 1422 |
+
def get_model_name(model_filename):
|
| 1423 |
+
if "Fun" in model_filename:
|
| 1424 |
+
model_name = "Fun InP image2video"
|
| 1425 |
+
model_name += " 14B" if "14B" in model_filename else " 1.3B"
|
| 1426 |
+
elif "Vace" in model_filename:
|
| 1427 |
+
model_name = "Vace ControlNet"
|
| 1428 |
+
model_name += " 14B" if "14B" in model_filename else " 1.3B"
|
| 1429 |
+
elif "image" in model_filename:
|
| 1430 |
+
model_name = "Wan2.1 image2video"
|
| 1431 |
+
model_name += " 720p" if "720p" in model_filename else " 480p"
|
| 1432 |
+
elif "recam" in model_filename:
|
| 1433 |
+
model_name = "ReCamMaster"
|
| 1434 |
+
model_name += " 14B" if "14B" in model_filename else " 1.3B"
|
| 1435 |
+
elif "FLF2V" in model_filename:
|
| 1436 |
+
model_name = "Wan2.1 FLF2V"
|
| 1437 |
+
model_name += " 720p" if "720p" in model_filename else " 480p"
|
| 1438 |
+
else:
|
| 1439 |
+
model_name = "Wan2.1 text2video"
|
| 1440 |
+
model_name += " 14B" if "14B" in model_filename else " 1.3B"
|
| 1441 |
+
|
| 1442 |
+
return model_name
|
| 1443 |
|
| 1444 |
|
| 1445 |
def get_model_filename(model_type, quantization):
|
|
|
|
| 1467 |
return "Several giant wooly mammoths approach treading through a snowy meadow, their long wooly fur lightly blows in the wind as they walk, snow covered trees and dramatic snow capped mountains in the distance, mid afternoon light with wispy clouds and a sun high in the distance creates a warm glow, the low camera view is stunning capturing the large furry mammal with beautiful photography, depth of field."
|
| 1468 |
else:
|
| 1469 |
return "A large orange octopus is seen resting on the bottom of the ocean floor, blending in with the sandy and rocky terrain. Its tentacles are spread out around its body, and its eyes are closed. The octopus is unaware of a king crab that is crawling towards it from behind a rock, its claws raised and ready to attack. The crab is brown and spiny, with long legs and antennae. The scene is captured from a wide angle, showing the vastness and depth of the ocean. The water is clear and blue, with rays of sunlight filtering through. The shot is sharp and crisp, with a high dynamic range. The octopus and the crab are in focus, while the background is slightly blurred, creating a depth of field effect."
|
| 1470 |
+
i2v = test_class_i2v(filename)
|
| 1471 |
defaults_filename = get_settings_file_name(filename)
|
| 1472 |
if not Path(defaults_filename).is_file():
|
| 1473 |
ui_defaults = {
|
| 1474 |
"prompt": get_default_prompt(i2v),
|
| 1475 |
+
"resolution": "1280x720" if "720p" in filename else "832x480",
|
| 1476 |
"video_length": 81,
|
| 1477 |
"num_inference_steps": 30,
|
| 1478 |
"seed": -1,
|
|
|
|
| 1828 |
|
| 1829 |
major, minor = torch.cuda.get_device_capability(args.gpu if len(args.gpu) > 0 else None)
|
| 1830 |
default_dtype = torch.float16 if major < 8 else torch.bfloat16
|
| 1831 |
+
# default_dtype = torch.bfloat16
|
| 1832 |
if default_dtype == torch.float16 or args.fp16:
|
| 1833 |
print("Switching to f16 model as GPU architecture doesn't support bf16")
|
| 1834 |
if "quanto" in model_filename:
|
|
|
|
| 1875 |
return 7.0 if "480p" in filename and i2v else 5.0
|
| 1876 |
|
| 1877 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1878 |
|
| 1879 |
|
| 1880 |
|
|
|
|
| 2600 |
slg_end = slg_end_perc/100,
|
| 2601 |
cfg_star_switch = cfg_star_switch,
|
| 2602 |
cfg_zero_step = cfg_zero_step,
|
| 2603 |
+
add_frames_for_end_image = "image2video" in model_filename
|
| 2604 |
)
|
| 2605 |
else:
|
| 2606 |
samples = wan_model.generate(
|
|
|
|
| 3227 |
if target == "settings":
|
| 3228 |
return inputs
|
| 3229 |
|
| 3230 |
+
if not test_class_i2v(model_filename):
|
| 3231 |
inputs.pop("image_prompt_type")
|
| 3232 |
|
| 3233 |
|
|
|
|
| 3569 |
if not update_form:
|
| 3570 |
state = gr.State(state_dict)
|
| 3571 |
trigger_refresh_input_type = gr.Text(interactive= False, visible= False)
|
| 3572 |
+
with gr.Column(visible= test_class_i2v(model_filename) ) as image_prompt_column:
|
| 3573 |
image_prompt_type_value= ui_defaults.get("image_prompt_type","S")
|
| 3574 |
image_prompt_type = gr.Radio( [("Use only a Start Image", "S"),("Use both a Start and an End Image", "SE")], value =image_prompt_type_value, label="Location", show_label= False, scale= 3)
|
| 3575 |
|
|
|
|
| 3682 |
wizard_prompt_activated_var = gr.Text(wizard_prompt_activated, visible= False)
|
| 3683 |
wizard_variables_var = gr.Text(wizard_variables, visible = False)
|
| 3684 |
with gr.Row():
|
| 3685 |
+
if test_class_i2v(model_filename):
|
| 3686 |
resolution = gr.Dropdown(
|
| 3687 |
choices=[
|
| 3688 |
# 720p
|
|
|
|
| 4676 |
theme = gr.themes.Soft(font=["Verdana"], primary_hue="sky", neutral_hue="slate", text_size="md")
|
| 4677 |
|
| 4678 |
with gr.Blocks(css=css, theme=theme, title= "Wan2GP") as demo:
|
| 4679 |
+
gr.Markdown("<div align=center><H1>Wan<SUP>GP</SUP> v4.2 <FONT SIZE=4>by <I>DeepBeepMeep</I></FONT> <FONT SIZE=3>") # (<A HREF='https://github.com/deepbeepmeep/Wan2GP'>Updates</A>)</FONT SIZE=3></H1></div>")
|
| 4680 |
global model_list
|
| 4681 |
|
| 4682 |
tab_state = gr.State({ "tab_no":0 })
|