| { | |
| "config": "./config/mmvid_edit_config.yaml", | |
| "resume": null, | |
| "save_model": "./results/edit_2025_07_07_08_55_seed42_ema-1_mmvid/model", | |
| "save_mode": "best", | |
| "res_root_dir": "./results", | |
| "debug": false, | |
| "seed": 42, | |
| "no_cuda": false, | |
| "no_pin_memory": true, | |
| "cuda": true, | |
| "dalle_param": { | |
| "vae": { | |
| "which_vae": "vqgan1024", | |
| "vae_path": "./pretrained_vqgan/edit_epoch=000050.ckpt", | |
| "image_size": 224 | |
| }, | |
| "bert": { | |
| "num_text_tokens": 0, | |
| "text_seq_len": 24, | |
| "dim": 768, | |
| "loss_img_weight": 7, | |
| "text_feature_dim": 0, | |
| "fixed_language_model": null, | |
| "text_emb_bottleneck": null, | |
| "which_transformer": "openai_clip_visual", | |
| "num_targets": 4, | |
| "num_visuals": 0, | |
| "beit": true, | |
| "use_separate_visual_emb": false, | |
| "insert_sep": false, | |
| "openai_clip_path": "./ckpt/ViT-B-32.pt", | |
| "vision_layers": 12 | |
| }, | |
| "skip_params": [ | |
| "to_logits_vid.1.bias", | |
| "to_logits_vid.1.weight", | |
| "to_logits_vid.0.bias", | |
| "to_logits_vid.0.weight", | |
| "to_logits_rel.1.bias", | |
| "to_logits_rel.1.weight", | |
| "to_logits_rel.0.bias", | |
| "to_logits_rel.0.weight", | |
| "to_logits.1.bias", | |
| "to_logits.1.weight", | |
| "to_logits.0.bias", | |
| "to_logits.0.weight", | |
| "to_logits_text.1.bias", | |
| "to_logits_text.1.weight", | |
| "to_logits_text.0.bias", | |
| "to_logits_text.0.weight", | |
| "image_emb.weight" | |
| ], | |
| "freeze": false, | |
| "use_lora": false, | |
| "lora_config": { | |
| "r": 8, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0.1, | |
| "bias": "none" | |
| } | |
| }, | |
| "decoder_param": { | |
| "max_n_sen": 12, | |
| "max_t_len": 24, | |
| "max_v_len": 4, | |
| "exp_id": "init", | |
| "hidden_size": 512, | |
| "intermediate_size": 2048, | |
| "num_hidden_layers": 2, | |
| "num_attention_heads": 8, | |
| "mask_prob": 0.0, | |
| "hidden_dropout_prob": 0.1, | |
| "label_smoothing": 0.1, | |
| "recurrent": false, | |
| "untied": false, | |
| "mtrans": true, | |
| "use_beam": false, | |
| "vocab_size": 524, | |
| "mask_token_id": 7 | |
| }, | |
| "dset_name": "edit", | |
| "data_dir": "/home/sunjiayang/VFI4IDC_test/IDC_scratch_model/densevid_eval/edit_data", | |
| "video_feature_dir": "./data/edit/IER_processed", | |
| "word2idx_path": "./cache/edit_word2idx2.json", | |
| "glove_path": "./cache/yc2_vocab_glove.pt", | |
| "eval_tool_dir": "/home/sunjiayang/VFI4IDC_test/IDC_scratch_model/densevid_eval", | |
| "filtered": true, | |
| "filter_file_path": "./filter_files/edit_similarity_scores.json", | |
| "max_k": 5, | |
| "num_frames": 9, | |
| "recurrent": false, | |
| "untied": false, | |
| "mtrans": true, | |
| "use_beam": false, | |
| "image_size": 224, | |
| "n_epoch": 40, | |
| "batch_size": 16, | |
| "val_batch_size": 32, | |
| "max_es_cnt": 50, | |
| "lr": 5e-05, | |
| "lr_finetune": 5e-05, | |
| "lr_warmup_proportion": 0.1, | |
| "grad_clip": 1, | |
| "weight_decay": 0.01, | |
| "ema_decay": -1, | |
| "num_workers": 4, | |
| "temperature": 0.5, | |
| "metric_reference": "CIDEr", | |
| "pretrained_model": "./ckpt/img_size224_layer12_edit_wovisual_beit_softmax/dalle.pt", | |
| "res_dir": "./results/edit_2025_07_07_08_55_seed42_ema-1_mmvid", | |
| "log": "./results/edit_2025_07_07_08_55_seed42_ema-1_mmvid/model", | |
| "pin_memory": false | |
| } |