|
|
from dataclasses import dataclass, field |
|
|
from typing import Optional |
|
|
import transformers |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class DataArguments: |
|
|
lazy_preprocess: bool = False |
|
|
only_two_class: bool = False |
|
|
old_two_class: bool = False |
|
|
is_multimodal: bool = False |
|
|
|
|
|
image_folder: Optional[str] = field(default='/home/emzhang/data/segmentation/refer_seg/images/mscoco/images/train2014') |
|
|
mask_config: Optional[str] = field(default="./objectrelator/mask_config/maskformer2_swin_base_384_bs16_50ep.yaml") |
|
|
image_aspect_ratio: str = 'square' |
|
|
image_grid_pinpoints: Optional[str] = field(default=None) |
|
|
region_mask_type: Optional[str] = field(default=None) |
|
|
|
|
|
json_path: str = '/home/emzhang/code/LLaVA/datasets/refcoco/refcoco_val.json' |
|
|
|
|
|
split_path: str = '' |
|
|
split: str = 'val' |
|
|
model_path: str = '/home/emzhang/code/llava_zem/checkpoints/SEG_class_refcoco_after_fixbug' |
|
|
model_map_name: str = 'ObjectRelator' |
|
|
SEG_norm: bool = field(default=False) |
|
|
SEG_proj: bool = field(default=True) |
|
|
criterion_type: Optional[str] = field(default="concat_seg") |
|
|
matcher_type: Optional[str] = field(default="wo_class") |
|
|
llm_pos: Optional[str] = field(default="none") |
|
|
ln_2048: bool = field(default=False) |
|
|
version_val: str = 'opt-iml-1.3b' |
|
|
seg_idx_back: bool = field(default=False) |
|
|
segmentation: bool = True |
|
|
eval_batch_size: int = 1 |
|
|
dataloader_num_workers_val: int = 4 |
|
|
thr: float = 0.5 |
|
|
topk: int=1 |
|
|
fuse_score: bool = field(default=False) |
|
|
seg_task: Optional[str] = field(default="region") |
|
|
seg_last: bool = field(default=True) |
|
|
num_chunks: int=1 |
|
|
chunk_idx: int=0 |
|
|
|
|
|
condition: str = 'multi-condition' |
|
|
|
|
|
|
|
|
refcoco_image_folder: Optional[str] = "/path/to/refer_seg/images/mscoco/images/train2014" |
|
|
image_first: bool = field(default=True) |
|
|
instruction_version: str = 'v1' |
|
|
instance_json_path: str = '/path/to/instruction_segmentation_train.json' |
|
|
lvis_json_path: str = '/path/to/lvis_instance_train.json' |
|
|
lvis_categories_path: str = '/path/to/lvis_instance_categories.json' |
|
|
|
|
|
region_json_path: str = '/path/to/visual_prompt_segmentation_train.json' |
|
|
panoptic_json_path: str = "/path/to/coco" |
|
|
ref_coco_path: str = '/path/to/refcoco/refcoco_train.json' |
|
|
ref_coco_plus_path: str = '/path/to/refcoco+/refcoco+_train.json' |
|
|
ref_coco_g_path: str = '/path/to/refcocog/refcocog_train.json' |
|
|
mmconv_path: str = '/path/to/llava_1_5' |
|
|
data_ratio: str = '1||1||1||1' |
|
|
fix_dataset_len: int = 0 |
|
|
|
|
|
joint_json_ego2exo: str = '/path/to/joint_ego_exo.json' |
|
|
joint_json_exo2ego: str = '/path/to/joint_exo_ego.json' |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class ModelArguments: |
|
|
model_name_or_path: Optional[str] = field(default="facebook/opt-125m") |
|
|
version: Optional[str] = field(default="v0") |
|
|
freeze_backbone: bool = field(default=False) |
|
|
train_backbone: bool = field(default=False) |
|
|
tune_mm_mlp_adapter: bool = field(default=False) |
|
|
vision_tower: Optional[str] = field(default=None) |
|
|
mm_vision_select_layer: Optional[int] = field(default=-1) |
|
|
pretrain_mm_mlp_adapter: Optional[str] = field(default=None) |
|
|
mm_use_im_start_end: bool = field(default=False) |
|
|
mm_use_im_patch_token: bool = field(default=True) |
|
|
mm_vision_select_feature: Optional[str] = field(default="patch") |
|
|
with_norm: bool = field(default=True) |
|
|
with_layernorm: bool = field(default=False) |
|
|
skip_init_vision: bool = field(default=False) |
|
|
with_sam: bool = field(default=False) |
|
|
with_swin: bool = field(default=False) |
|
|
with_teacher: bool = field(default=False) |
|
|
swin_type: Optional[str] = field(default="base") |
|
|
projector_outdim: Optional[int] = field(default=2048) |
|
|
mm_projector_type: Optional[str] = field(default="swin_conv") |
|
|
model_version: Optional[str] = field(default="v1") |
|
|
load_mask2former: bool = field(default=True) |
|
|
dino_path: Optional[str] = field(default=None) |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class TrainingArguments(transformers.TrainingArguments): |
|
|
cache_dir: Optional[str] = field(default=None) |
|
|
optim: str = field(default="adamw_torch") |
|
|
remove_unused_columns: bool = field(default=False) |
|
|
freeze_mm_mlp_adapter: bool = field(default=False) |
|
|
mpt_attn_impl: Optional[str] = field(default="triton") |
|
|
model_max_length: int = field( |
|
|
default=512, |
|
|
metadata={ |
|
|
"help": |
|
|
"Maximum sequence length. Sequences will be right padded (and possibly truncated)." |
|
|
}, |
|
|
) |
|
|
double_quant: bool = field( |
|
|
default=True, |
|
|
metadata={"help": "Compress the quantization statistics through double quantization."} |
|
|
) |
|
|
quant_type: str = field( |
|
|
default="nf4", |
|
|
metadata={"help": "Quantization data type to use. Should be one of `fp4` or `nf4`."} |
|
|
) |
|
|
bits: int = field( |
|
|
default=16, |
|
|
metadata={"help": "How many bits to use."} |
|
|
) |
|
|
lora_enable: bool = False |
|
|
lora_r: int = 64 |
|
|
lora_alpha: int = 16 |
|
|
lora_dropout: float = 0.05 |
|
|
lora_weight_path: str = "" |
|
|
lora_bias: str = "none" |
|
|
dataloader_drop_last: bool = True |
|
|
|
|
|
|
|
|
is_handal: bool = False |
|
|
|
|
|
joint_training: bool = False |
|
|
|
|
|
first_stage: bool = False |
|
|
|
|
|
pretrained_model_path: str = "/path/to/pretrained_model" |
|
|
output_dir: str = "/path/to/output_dir" |