import transformers | |
from transformers import (Qwen2Config, Qwen2ForCausalLM, SiglipVisionConfig, | |
SiglipVisionModel, PretrainedConfig) | |
from transformers import AutoConfig | |
import copy | |
class FlashVLStaticConfig(PretrainedConfig): | |
model_type = 'FlashVLStaticConfig' | |
is_composition = True | |
def __init__( | |
self, | |
vision_config=dict(model_type='siglip_vision_model'), | |
llm_config=dict(architectures=['Qwen2ForCausalLM']), | |
**kwargs | |
): | |
super().__init__(**kwargs) | |
self.vision_config = SiglipVisionConfig(**vision_config) | |
self.llm_config = Qwen2Config(**llm_config) | |
def to_dict(self): | |
output = copy.deepcopy(self.__dict__) | |
output['vision_config'] = self.vision_config.to_dict() | |
output['llm_config'] = self.llm_config.to_dict() | |
return output | |