|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from transformers.configuration_utils import PretrainedConfig |
|
|
from transformers.utils import logging |
|
|
|
|
|
logger = logging.get_logger(__name__) |
|
|
|
|
|
DEEPSEEK_OCR_PRETRAINED_CONFIG_ARCHIVE_MAP = {} |
|
|
|
|
|
|
|
|
class DeepseekOCRConfig(PretrainedConfig): |
|
|
""" |
|
|
Configuration class for the Deepseek-OCR model. |
|
|
Allows Hugging Face Auto classes to recognize this custom architecture. |
|
|
|
|
|
Example: |
|
|
```python |
|
|
from transformers import AutoConfig |
|
|
config = AutoConfig.from_pretrained("specsGuy/Deepseek-ocr", trust_remote_code=True) |
|
|
print(config.model_type) # deepseekocr |
|
|
``` |
|
|
""" |
|
|
|
|
|
model_type = "deepseekocr" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
vocab_size=129280, |
|
|
hidden_size=1280, |
|
|
intermediate_size=6848, |
|
|
num_hidden_layers=12, |
|
|
num_attention_heads=10, |
|
|
num_key_value_heads=10, |
|
|
max_position_embeddings=8192, |
|
|
moe_intermediate_size=896, |
|
|
n_group=1, |
|
|
n_routed_experts=64, |
|
|
n_shared_experts=2, |
|
|
num_experts_per_tok=6, |
|
|
first_k_dense_replace=1, |
|
|
topk_group=1, |
|
|
topk_method="greedy", |
|
|
use_mla=False, |
|
|
bos_token_id=0, |
|
|
eos_token_id=1, |
|
|
torch_dtype="bfloat16", |
|
|
projector_config=None, |
|
|
vision_config=None, |
|
|
language_config=None, |
|
|
**kwargs, |
|
|
): |
|
|
|
|
|
self.vocab_size = vocab_size |
|
|
self.hidden_size = hidden_size |
|
|
self.intermediate_size = intermediate_size |
|
|
self.num_hidden_layers = num_hidden_layers |
|
|
self.num_attention_heads = num_attention_heads |
|
|
self.num_key_value_heads = num_key_value_heads |
|
|
self.max_position_embeddings = max_position_embeddings |
|
|
self.moe_intermediate_size = moe_intermediate_size |
|
|
|
|
|
|
|
|
self.n_group = n_group |
|
|
self.n_routed_experts = n_routed_experts |
|
|
self.n_shared_experts = n_shared_experts |
|
|
self.num_experts_per_tok = num_experts_per_tok |
|
|
self.first_k_dense_replace = first_k_dense_replace |
|
|
self.topk_group = topk_group |
|
|
self.topk_method = topk_method |
|
|
self.use_mla = use_mla |
|
|
|
|
|
|
|
|
self.bos_token_id = bos_token_id |
|
|
self.eos_token_id = eos_token_id |
|
|
self.torch_dtype = torch_dtype |
|
|
|
|
|
|
|
|
self.projector_config = projector_config |
|
|
self.vision_config = vision_config |
|
|
self.language_config = language_config |
|
|
|
|
|
super().__init__( |
|
|
bos_token_id=bos_token_id, |
|
|
eos_token_id=eos_token_id, |
|
|
**kwargs, |
|
|
) |
|
|
|
|
|
logger.info("✅ Initialized DeepseekOCRConfig successfully.") |
|
|
|