Upload 2 files
Browse files- configuration_centurio.py +1 -1
- modeling_centurio.py +7 -6
configuration_centurio.py
CHANGED
@@ -37,7 +37,7 @@ class CenturioConfig(PretrainedConfig):
|
|
37 |
ignore_index=-100,
|
38 |
image_token_index=32000,
|
39 |
adapter_type="multiscale-pool",
|
40 |
-
adapter_config=
|
41 |
**kwargs,
|
42 |
):
|
43 |
self.ignore_index = ignore_index
|
|
|
37 |
ignore_index=-100,
|
38 |
image_token_index=32000,
|
39 |
adapter_type="multiscale-pool",
|
40 |
+
adapter_config=dict(),
|
41 |
**kwargs,
|
42 |
):
|
43 |
self.ignore_index = ignore_index
|
modeling_centurio.py
CHANGED
@@ -74,7 +74,7 @@ class LlavaMultiModalAdapter(nn.Module):
|
|
74 |
class WindowMLPProjector(nn.Module):
|
75 |
def __init__(self, config: LlavaConfig):
|
76 |
super().__init__()
|
77 |
-
self.multi_scale =
|
78 |
self.linear_1 = nn.Linear(config.image_hidden_size, config.text_config.hidden_size, bias=True)
|
79 |
self.act = ACT2FN["gelu"]
|
80 |
self.linear_2 = nn.Linear(config.text_config.hidden_size, config.text_config.hidden_size, bias=True)
|
@@ -93,7 +93,7 @@ class WindowMLPProjector(nn.Module):
|
|
93 |
class WindowPoolProjector(nn.Module):
|
94 |
def __init__(self, config: LlavaConfig):
|
95 |
super().__init__()
|
96 |
-
self.multi_scale =
|
97 |
self.pool = nn.AdaptiveAvgPool2d(getattr(config, "adapter_pool", 8))
|
98 |
self.linear_1 = nn.Linear(config.image_hidden_size, config.text_config.hidden_size, bias=True)
|
99 |
self.act = ACT2FN["gelu"]
|
@@ -119,7 +119,7 @@ class WindowPoolProjector(nn.Module):
|
|
119 |
class WindowShuffelProjector(nn.Module):
|
120 |
def __init__(self, config: LlavaConfig):
|
121 |
super().__init__()
|
122 |
-
self.multi_scale =
|
123 |
self.scale_factor = getattr(config, "adapter_pool", 2)
|
124 |
self.pixel_unshuffel = nn.PixelUnshuffle(self.scale_factor)
|
125 |
self.linear_1 = nn.Linear(config.image_hidden_size*(self.scale_factor**2), config.text_config.hidden_size, bias=True)
|
@@ -148,7 +148,7 @@ class MultiscalePoolProjector(nn.Module):
|
|
148 |
def __init__(self, config: LlavaConfig):
|
149 |
super().__init__()
|
150 |
|
151 |
-
self.multi_scale = getattr(config, "adapter_multi_scale", 2)
|
152 |
self.pool = nn.AvgPool2d(self.multi_scale)
|
153 |
self.linear_1 = nn.Linear(config.image_hidden_size*2, config.text_config.hidden_size, bias=True)
|
154 |
self.act = ACT2FN["gelu"]
|
@@ -181,7 +181,7 @@ class MultiscaleShuffleProjector(nn.Module):
|
|
181 |
def __init__(self, config):
|
182 |
super().__init__()
|
183 |
|
184 |
-
self.multi_scale =
|
185 |
self.shuffle = nn.PixelUnshuffle(self.multi_scale)
|
186 |
|
187 |
inc, ouc = config.image_hidden_size*(1+self.multi_scale**2), config.text_config.hidden_size
|
@@ -447,7 +447,8 @@ class CenturioForConditionalGeneration(LlavaPreTrainedModel):
|
|
447 |
self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1
|
448 |
self.post_init()
|
449 |
|
450 |
-
|
|
|
451 |
|
452 |
def get_input_embeddings(self):
|
453 |
return self.language_model.get_input_embeddings()
|
|
|
74 |
class WindowMLPProjector(nn.Module):
|
75 |
def __init__(self, config: LlavaConfig):
|
76 |
super().__init__()
|
77 |
+
self.multi_scale = config.adapter_config.get("multi_scale", 2) #config.adapter_config.get("multi_scale")
|
78 |
self.linear_1 = nn.Linear(config.image_hidden_size, config.text_config.hidden_size, bias=True)
|
79 |
self.act = ACT2FN["gelu"]
|
80 |
self.linear_2 = nn.Linear(config.text_config.hidden_size, config.text_config.hidden_size, bias=True)
|
|
|
93 |
class WindowPoolProjector(nn.Module):
|
94 |
def __init__(self, config: LlavaConfig):
|
95 |
super().__init__()
|
96 |
+
self.multi_scale = config.adapter_config.get("multi_scale", 2) #config.adapter_config.get("multi_scale")
|
97 |
self.pool = nn.AdaptiveAvgPool2d(getattr(config, "adapter_pool", 8))
|
98 |
self.linear_1 = nn.Linear(config.image_hidden_size, config.text_config.hidden_size, bias=True)
|
99 |
self.act = ACT2FN["gelu"]
|
|
|
119 |
class WindowShuffelProjector(nn.Module):
|
120 |
def __init__(self, config: LlavaConfig):
|
121 |
super().__init__()
|
122 |
+
self.multi_scale = config.adapter_config.get("multi_scale", 2) #config.adapter_config.get("multi_scale")
|
123 |
self.scale_factor = getattr(config, "adapter_pool", 2)
|
124 |
self.pixel_unshuffel = nn.PixelUnshuffle(self.scale_factor)
|
125 |
self.linear_1 = nn.Linear(config.image_hidden_size*(self.scale_factor**2), config.text_config.hidden_size, bias=True)
|
|
|
148 |
def __init__(self, config: LlavaConfig):
|
149 |
super().__init__()
|
150 |
|
151 |
+
self.multi_scale = config.adapter_config.get("multi_scale", 2) #getattr(config.adapter_config, "adapter_multi_scale", 2)
|
152 |
self.pool = nn.AvgPool2d(self.multi_scale)
|
153 |
self.linear_1 = nn.Linear(config.image_hidden_size*2, config.text_config.hidden_size, bias=True)
|
154 |
self.act = ACT2FN["gelu"]
|
|
|
181 |
def __init__(self, config):
|
182 |
super().__init__()
|
183 |
|
184 |
+
self.multi_scale = config.adapter_config.get("multi_scale", 2) #config.adapter_config.get("multi_scale")
|
185 |
self.shuffle = nn.PixelUnshuffle(self.multi_scale)
|
186 |
|
187 |
inc, ouc = config.image_hidden_size*(1+self.multi_scale**2), config.text_config.hidden_size
|
|
|
447 |
self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1
|
448 |
self.post_init()
|
449 |
|
450 |
+
def tie_weights(self):
|
451 |
+
return self.language_model.tie_weights()
|
452 |
|
453 |
def get_input_embeddings(self):
|
454 |
return self.language_model.get_input_embeddings()
|