VLog / args.json
KevinQHLin's picture
Upload args.json with huggingface_hub
fcfd8fd verified
{
"exp_id": "vidcab_ret_m",
"debug": false,
"llm_model": "gpt2-medium",
"llm_8bit": false,
"freeze_lm": false,
"lora": false,
"vis_model": "openai/clip-vit-base-patch32",
"precision": "bf16",
"n_visual_tokens": 1,
"rand_init": false,
"num_layers": -1,
"hidden_dim": 1152,
"nheads": 8,
"dim_feedforward": 2048,
"dropout": 0.1,
"droppath": 0.1,
"vis_pooling": false,
"vis_query_pooling": false,
"last_vis_mean": false,
"vocab_model": "google/siglip-so400m-patch14-384",
"dataset_dir": "/blob/v-lqinghong/data/Ego_database",
"log_base_dir": "/blob/v-lqinghong/experiments/VLog",
"dataset": "ret",
"metadata": "egoclip_vidcab",
"fullset": false,
"val_dataset": "ret",
"val_metadata": "egoclip_vidcab",
"workers": 16,
"visual_input": "feature",
"image_size": 224,
"num_frame": 1,
"add_special_tokens": false,
"num_history": 0,
"past_len": 0,
"train_narrator": "narration_pass_2",
"add_eos": true,
"max_len": 128,
"max_len_eval": 128,
"max_clip_len": 128,
"temperature": 0.7,
"epochs": 10,
"start_epoch": 0,
"steps_per_epoch": -1,
"val_steps_per_epoch": -1,
"batch_size": 32,
"val_batch_size": 32,
"print_freq": 1,
"resume": "",
"evaluate": false,
"only_best": true,
"do_sample": false,
"lock_lm": false,
"lr": 0.0003,
"beta1": 0.9,
"beta2": 0.95,
"weight_decay": 0.0,
"lr_warmup_steps": 100,
"lr_schedule_step_size": 10,
"lr_schedule_gamma": 0.1,
"grad_accumulation_steps": 1,
"grad_clip": 1.0,
"loss": "nce",
"nce_temperature": 0.05,
"scale_lm_loss": 1.0,
"scale_ret_loss": 1.0,
"train_class": false,
"main_node": true,
"world_size": -1,
"local_rank": -1,
"dist_url": "tcp://localhost:44122",
"dist_backend": "nccl",
"seed": null,
"gpu": null,
"multiprocessing_distributed": false,
"log_dir": "/blob/v-lqinghong/experiments/VLog/vidcab_ret_m/2025-03-08_12-48-28"
}