RxnIM / rxn /model /cfg.py
CYF200127's picture
Upload 12 files
092aa0d verified
DEFAULT_TEST_DATASET = dict(
flickr=dict(
filename='./reactiondata/real_test.jsonl',
image_folder='./reaction_image',
template_file='./config/_base_/dataset/template/reaction.json',
type='FlickrDataset'),
reg=dict(
filename='./reactiondata/train_OCR.jsonl',
image_folder='./reaction_image_OCR',
template_file='./config/_base_/dataset/template/OCR.json',
type='REGDataset'))
DEFAULT_TRAIN_DATASET = dict(
flickr=dict(
filename='./reactiondata/reaction_real_structed.jsonl',
image_folder='./reaction_image',
template_file='./config/_base_/dataset/template/reaction.json',
type='FlickrDataset'),
reg=dict(
filename='./reactiondata/train_OCR.jsonl',
image_folder='./reaction_image_OCR',
template_file='./config/_base_/dataset/template/OCR.json',
type='REGDataset'))
data_args = dict(
collator_kwargs=dict(max_length=1024, padding=True),
compute_metric=None,
gen_kwargs=dict(max_new_tokens=1024, num_beams=1),
test=None,
train=dict(
cfgs=[
dict(
filename='./reactiondata/train_OCR.jsonl',
image_folder='./reaction_image_OCR',
template_file='./config/_base_/dataset/template/OCR.json',
type='REGDataset'),
dict(
filename='./reactiondata/reaction_real_structed.jsonl',
image_folder='./reaction_image',
template_file='./config/_base_/dataset/template/reaction.json',
type='FlickrDataset'),
],
probabilities=[
0.0,
1,
],
seed=None,
stopping_strategy='first_exhausted',
type='InterleaveDateset'),
validation=dict(
cfgs=[
dict(
filename='./reactiondata/real_test.jsonl',
image_folder='./reaction_image',
template_file='./config/_base_/dataset/template/reaction.json',
type='FlickrDataset'),
],
type='ConcatDatasetWithShuffle'))
model_args = dict(
cache_dir=None,
conv_args=dict(
conv_template='vicuna_v1.1',
tokenize_kwargs=dict(truncation_size=2048)),
freeze_backbone=False,
freeze_mm_mlp_adapter=False,
gen_kwargs_set_bos_token_id=True,
gen_kwargs_set_eos_token_id=True,
gen_kwargs_set_pad_token_id=True,
image_token_len=300,
mm_use_im_start_end=True,
mm_vision_select_layer=-2,
model_max_length=2048,
model_name_or_path='./exp/reaction_4.2.1',
pretrain_mm_mlp_adapter=None,
process_func_args=dict(
conv=dict(type='ShikraConvProcess'),
image=dict(type='ShikraImageProcessor'),
target=dict(type='BoxFormatProcess'),
text=dict(type='ShikraTextProcess')),
sep_image_conv_front=False,
target_processor=dict(boxes=dict(type='PlainBoxFormatter')),
tune_mm_mlp_adapter=False,
type='shikra',
version='v1',
vision_tower='SenseTime/deformable-detr')
training_args = dict(
bf16=True,
dataloader_num_workers=4,
do_eval=False,
do_predict=False,
do_train=True,
evaluation_strategy='no',
fsdp='full_shard auto_wrap',
fsdp_transformer_layer_cls_to_wrap='LlamaDecoderLayer',
gradient_accumulation_steps=1,
gradient_checkpointing=True,
learning_rate=2e-05,
logging_steps=10,
lr_scheduler_type='cosine',
num_train_epochs=50,
output_dir='./exp/reaction_4.2.2-large',
overwrite_output_dir=False,
per_device_eval_batch_size=4,
per_device_train_batch_size=4,
predict_with_generate=True,
remove_unused_columns=False,
report_to='none',
save_steps=10000,
save_strategy='steps',
save_total_limit=1,
seed=42,
tf32=True,
warmup_ratio=0.03,
weight_decay=0.05)