| { | |
| "architectures": [ | |
| "Oculus" | |
| ], | |
| "model_type": "oculus", | |
| "torch_dtype": "float16", | |
| "transformers_version": "4.40.0", | |
| "dinov3_config": { | |
| "hidden_size": 1024, | |
| "num_hidden_layers": 24, | |
| "num_attention_heads": 16, | |
| "num_patches": 196, | |
| "image_size": 224 | |
| }, | |
| "siglip2_config": { | |
| "hidden_size": 1152, | |
| "num_hidden_layers": 27, | |
| "num_attention_heads": 16, | |
| "num_patches": 576, | |
| "image_size": 384 | |
| }, | |
| "fusion_config": { | |
| "method": "concatenation", | |
| "fused_dim": 2176, | |
| "output_resolution": "14x14" | |
| }, | |
| "projector_config": { | |
| "vision_dim": 2176, | |
| "hidden_dim": 4352, | |
| "lm_dim": 1536 | |
| }, | |
| "language_model_config": { | |
| "vocab_size": 131072, | |
| "hidden_size": 1536, | |
| "num_hidden_layers": 16, | |
| "num_attention_heads": 24, | |
| "max_position_embeddings": 32768 | |
| }, | |
| "task_heads_config": { | |
| "segmentation_num_classes": 150, | |
| "classification_num_classes": 1000, | |
| "detection_num_classes": 80, | |
| "ocr_max_seq_len": 100 | |
| } | |
| } | |