File size: 1,679 Bytes
8a096e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from transformers import PretrainedConfig
from typing import List
from transformers import Qwen2Config, CLIPVisionConfig
class InfMLLMUnifiedHDChatConfig(PretrainedConfig):
def __init__(
self,
vison_config=None,
lm_config=None,
lm_model="",
lm_tokenizer="",
lora_modules="",
lora_llm=False,
lora_r=128,
lora_alpha=256,
lora_dropout=0,
#
encoder_img="",
image_size_img=336,
lora_encoder_img=False,
hd_num=9,
#
encoder_video="",
#
max_txt_len=4096,
conv_style='qwen-7b-chat',
precision="bf16",
**kwargs
):
self.lm_model = lm_model
self.lm_tokenizer = lm_tokenizer
self.lora_modules = lora_modules
self.lora_llm = lora_llm
self.lora_r = lora_r
self.lora_alpha = lora_alpha
self.lora_dropout = lora_dropout
self.encoder_img = encoder_img
self.image_size_img = image_size_img
self.lora_encoder_img = lora_encoder_img
self.hd_num = hd_num
self.encoder_video = encoder_video
self.max_txt_len = max_txt_len
self.conv_style = conv_style
self.precision = precision
# print(vison_config, lm_config)
if type(vison_config) == dict:
self.vision_config = CLIPVisionConfig(**vison_config)
else:
self.vision_config = vison_config
if type(lm_config) == dict:
self.lm_config = Qwen2Config(**lm_config)
else:
self.lm_config = lm_config
super().__init__(**kwargs)
|