from transformers import PretrainedConfig, LlamaConfig, SiglipVisionConfig class LlamavisionConfig(PretrainedConfig): model_type = "llamavision" def __init__(self, **kwargs): self.text_config = LlamaConfig(**kwargs.pop("text_config", {})) self.vision_config = SiglipVisionConfig(**kwargs.pop("vision_config", {})) super().__init__(**kwargs)