update

Files changed (7) hide show

README.md CHANGED Viewed

@@ -44,7 +44,6 @@ We provide the following models:
 | Eagle2-1B | [Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) |  Siglip    | 16K| [🤗 link](https://huggingface.co/NVIDIA/Eagle2-1B)|
 | Eagle2-2B | [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) |  Siglip    | 16K| [🤗 link](https://huggingface.co/NVIDIA/Eagle2-2B)|
 | Eagle2-9B | [Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) |  Siglip+ConvNext    | 16K| [🤗 link](https://huggingface.co/NVIDIA/Eagle2-9B)|
-| Eagle2-32B | [Qwen2.5-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct) |  Siglip+ConvNext    | 16K| [🤗 link](https://huggingface.co/NVIDIA/Eagle2-32B)|
 ## Benchmark Results
 |          Benchmark           | MiniCPM-Llama3-V-2_5 | InternVL-Chat-V1-5 | InternVL2-8B |QwenVL2-7B| Eagle2-9B|

 | Eagle2-1B | [Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) |  Siglip    | 16K| [🤗 link](https://huggingface.co/NVIDIA/Eagle2-1B)|
 | Eagle2-2B | [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) |  Siglip    | 16K| [🤗 link](https://huggingface.co/NVIDIA/Eagle2-2B)|
 | Eagle2-9B | [Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) |  Siglip+ConvNext    | 16K| [🤗 link](https://huggingface.co/NVIDIA/Eagle2-9B)|
 ## Benchmark Results
 |          Benchmark           | MiniCPM-Llama3-V-2_5 | InternVL-Chat-V1-5 | InternVL2-8B |QwenVL2-7B| Eagle2-9B|

configuration_eagle_chat.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # --------------------------------------------------------
 # Eagle2
 # Copyright (c) 2025 NVIDIA
-# Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import copy
@@ -36,6 +36,7 @@ class Eagle2ChatConfig(PretrainedConfig):
             mlp_checkpoint=True,
             pre_feature_reduction=False,
             keep_aspect_ratio=False,
             **kwargs):
         super().__init__(**kwargs)
@@ -73,6 +74,7 @@ class Eagle2ChatConfig(PretrainedConfig):
         self.mlp_checkpoint = mlp_checkpoint
         self.pre_feature_reduction = pre_feature_reduction
         self.keep_aspect_ratio = keep_aspect_ratio
         logger.info(f'keep_aspect_ratio: {self.keep_aspect_ratio}')
         logger.info(f'vision_select_layer: {self.select_layer}')
         logger.info(f'min_dynamic_patch: {self.min_dynamic_patch}')

 # --------------------------------------------------------
 # Eagle2
 # Copyright (c) 2025 NVIDIA
+# Licensed under The Apache License [see LICENSE for details]
 # --------------------------------------------------------
 import copy
             mlp_checkpoint=True,
             pre_feature_reduction=False,
             keep_aspect_ratio=False,
+            vocab_size=-1,
             **kwargs):
         super().__init__(**kwargs)
         self.mlp_checkpoint = mlp_checkpoint
         self.pre_feature_reduction = pre_feature_reduction
         self.keep_aspect_ratio = keep_aspect_ratio
+        self.vocab_size = self.llm_config.vocab_size
         logger.info(f'keep_aspect_ratio: {self.keep_aspect_ratio}')
         logger.info(f'vision_select_layer: {self.select_layer}')
         logger.info(f'min_dynamic_patch: {self.min_dynamic_patch}')

configuration_multi_backbone_channel_concatentation_model.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # --------------------------------------------------------
 # Eagle2
 # Copyright (c) 2025 NVIDIA
-# Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import os

 # --------------------------------------------------------
 # Eagle2
 # Copyright (c) 2025 NVIDIA
+# Licensed under The Apache License [see LICENSE for details]
 # --------------------------------------------------------
 import os

modeling_eagle_chat.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # --------------------------------------------------------
 # Eagle2
 # Copyright (c) 2025 NVIDIA
-# Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import warnings

 # --------------------------------------------------------
 # Eagle2
 # Copyright (c) 2025 NVIDIA
+# Licensed under The Apache License [see LICENSE for details]
 # --------------------------------------------------------
 import warnings

modeling_siglip.py CHANGED Viewed

@@ -1,3 +1,11 @@
 # coding=utf-8
 # Copyright 2024 Google AI and The HuggingFace Team. All rights reserved.
 #
@@ -374,6 +382,10 @@ class SiglipAttention(nn.Module):
         """Input shape: Batch x Time x Channel"""
         if self.use_flash_attn:
             return self._flash_attn(hidden_states)
         batch_size, q_len, _ = hidden_states.size()
         query_states = self.q_proj(hidden_states)

+# --------------------------------------------------------
+# Eagle2
+# Copyright (c) 2025 NVIDIA
+# Licensed under The Apache License [see LICENSE for details]
+# Support flash-attention in SigLIP
+# --------------------------------------------------------
 # coding=utf-8
 # Copyright 2024 Google AI and The HuggingFace Team. All rights reserved.
 #
         """Input shape: Batch x Time x Channel"""
         if self.use_flash_attn:
             return self._flash_attn(hidden_states)
+        else:
+            return self._vanilla_attn(hidden_states, attention_mask, output_attentions)
+    def _vanilla_attn(self, hidden_states, attention_mask=None, output_attentions=False):
         batch_size, q_len, _ = hidden_states.size()
         query_states = self.q_proj(hidden_states)

multi_backbone_channel_concatenation_encoder.py CHANGED Viewed

@@ -1,13 +1,15 @@
 import torch, os
 import torch.nn as nn
 from torch.utils.checkpoint import checkpoint
 from .siglip_vision_tower import SiglipVisionTower
-# from .hr_clip_encoder import HRCLIPVisionTower
-# from .eva_vit import EVAVITVisionTower
-# from .SAM.modeling_sam import SAMVisionTower
-# from .pix2struct_large import Pix2StructLargeVisionTower
 import torch.nn.functional as F
 from torch.nn.init import trunc_normal_
 from copy import deepcopy

+# --------------------------------------------------------
+# Eagle2
+# Copyright (c) 2025 NVIDIA
+# Licensed under The Apache License [see LICENSE for details]
+# --------------------------------------------------------
 import torch, os
 import torch.nn as nn
 from torch.utils.checkpoint import checkpoint
 from .siglip_vision_tower import SiglipVisionTower
 import torch.nn.functional as F
 from torch.nn.init import trunc_normal_
 from copy import deepcopy

multi_backbone_channel_concatentation_model.py CHANGED Viewed

@@ -1,3 +1,9 @@
 import torch.nn as nn
 from transformers.modeling_outputs import BaseModelOutputWithPooling

+# --------------------------------------------------------
+# Eagle2
+# Copyright (c) 2025 NVIDIA
+# Licensed under The Apache License [see LICENSE for details]
+# --------------------------------------------------------
 import torch.nn as nn
 from transformers.modeling_outputs import BaseModelOutputWithPooling