import transformers from transformers import (Qwen2Config, Qwen2ForCausalLM, SiglipVisionConfig, SiglipVisionModel, PretrainedConfig) from transformers import AutoConfig import copy class FlashVLStaticConfig(PretrainedConfig): model_type = 'FlashVLStaticConfig' is_composition = True def __init__( self, vision_config=dict(model_type='siglip_vision_model'), llm_config=dict(architectures=['Qwen2ForCausalLM']), **kwargs ): super().__init__(**kwargs) self.vision_config = SiglipVisionConfig(**vision_config) self.llm_config = Qwen2Config(**llm_config) def to_dict(self): output = copy.deepcopy(self.__dict__) output['vision_config'] = self.vision_config.to_dict() output['llm_config'] = self.llm_config.to_dict() return output