LCL_2WAY_WEIGHT / cfg.py
Amoik's picture
First Version
08eed33
IMAGENET1K_TRAIN = dict(
type='ImageNet1kDatasetTrain',
filename=
'/mnt/lustre/share_data/taiyan/dataset/imagenet1k/train900_pairs.jsonl',
image_folder='/mnt/lustre/share_data/taiyan/dataset/ImageNet-1K',
template_file=
'/mnt/cache/taiyan/unify_mllm/config/_base_/dataset/template/ICL.json')
DEFAULT_TRAIN_IMAGENET1K_VARIANT = dict(
imagenet1k_train=dict(
type='ImageNet1kDatasetTrain',
filename=
'/mnt/lustre/share_data/taiyan/dataset/imagenet1k/train900_pairs.jsonl',
image_folder='/mnt/lustre/share_data/taiyan/dataset/ImageNet-1K',
template_file=
'/mnt/cache/taiyan/unify_mllm/config/_base_/dataset/template/ICL.json')
)
model_args = dict(
type='llava',
version='v1',
cache_dir=None,
model_name_or_path=
'/mnt/lustre/share_data/xiechi/misc/to_weichen/llava_pretrain_final19/checkpoint-44000/',
vision_tower=
'/mnt/lustre/share_data/chenkeqin/VG/ckpt/openai/clip-vit-large-patch14',
pretrain_mm_mlp_adapter=None,
mm_vision_select_layer=-2,
model_max_length=30000,
freeze_backbone=False,
tune_mm_mlp_adapter=False,
freeze_mm_mlp_adapter=False,
freeze_mm_projector=False,
is_multimodal=True,
sep_image_conv_front=False,
image_token_len=256,
mm_use_im_start_end=True,
target_processor=dict(boxes=dict(type='PlainBoxFormatter')),
process_func_args=dict(
conv=dict(type='LLavaConvProcessV1'),
target=dict(type='BoxFormatProcess'),
text=dict(type='LlavaTextProcessV2'),
image=dict(type='LlavaImageProcessorV1')),
conv_args=dict(
conv_template=[
'hypnotized_v1.0', 'hypnotized_v1.1', 'hypnotized_ans_v1.0',
'vicuna_v1.1', 'causal_v1.0', 'final_v1.0'
],
transforms=dict(type='Expand2square'),
tokenize_kwargs=dict(truncation_size=2048)),
gen_kwargs_set_pad_token_id=True,
gen_kwargs_set_bos_token_id=True,
gen_kwargs_set_eos_token_id=True)
training_args = dict(
output_dir='/mnt/cache/taiyan/unify_mllm/checkpoints/2way_weight',
overwrite_output_dir=True,
report_to='none',
seed=42,
remove_unused_columns=False,
do_train=True,
per_device_train_batch_size=1,
gradient_accumulation_steps=1,
num_train_epochs=50,
learning_rate=2e-05,
lr_scheduler_type='cosine',
weight_decay=0.0,
warmup_ratio=0.03,
evaluation_strategy='no',
tf32=False,
bf16=False,
gradient_checkpointing=True,
fsdp='full_shard auto_wrap',
fsdp_transformer_layer_cls_to_wrap='LlamaDecoderLayer',
logging_steps=10,
save_strategy='steps',
save_steps=500,
do_eval=False,
do_predict=False,
predict_with_generate=True,
per_device_eval_batch_size=8,
dataloader_num_workers=4,
fp16=True)
data_args = dict(
train=dict(
type='ImageNet1kDatasetTrain',
filename=
'/mnt/lustre/share_data/taiyan/dataset/imagenet1k/train900_pairs.jsonl',
image_folder='/mnt/lustre/share_data/taiyan/dataset/ImageNet-1K',
template_file=
'/mnt/cache/taiyan/unify_mllm/config/_base_/dataset/template/ICL.json',
policy='policy_2way_weight'),
validation=None,
test=None,
compute_metric=None,
collator_kwargs=dict(padding=True, max_length=1024),
gen_kwargs=dict(max_new_tokens=1024, num_beams=1),
use_icl=True,
shot=8)