suayptalha
/

minGRU-LM

Text Generation

Model card Files Files and versions Community

suayptalha commited on 26 days ago

Commit

d1adb77

·

verified ·

1 Parent(s): bc73b41

Update modeling_minGRULM.py

Files changed (1) hide show

modeling_minGRULM.py +27 -2

modeling_minGRULM.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import Optional
 from .configuration_minGRULM import MinGRULMConfig
 from minGRU_pytorch.minGRULM import minGRULM
 # Wrapper class for device compatibility
 class MinGRULMWrapped(nn.Module):
     def __init__(self, min_gru_model):
@@ -62,6 +61,9 @@ class MinGRULMForCausalLM(MinGRULMPreTrainedModel):
         # Language modeling head
         self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False)
         self.post_init()
     def get_input_embeddings(self):
@@ -103,4 +105,27 @@ class MinGRULMForCausalLM(MinGRULMPreTrainedModel):
         return CausalLMOutputWithPast(
             loss=loss,
             logits=logits,
-        )

 from .configuration_minGRULM import MinGRULMConfig
 from minGRU_pytorch.minGRULM import minGRULM
 # Wrapper class for device compatibility
 class MinGRULMWrapped(nn.Module):
     def __init__(self, min_gru_model):
         # Language modeling head
         self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False)
+        # Initialize weights (if required for missing layers)
+        self.initialize_layers()
         self.post_init()
     def get_input_embeddings(self):
         return CausalLMOutputWithPast(
             loss=loss,
             logits=logits,
+        )
+    def initialize_layers(self):
+        """
+        Initialize missing layers in the model, such as custom layers or parts of the minGRULM.
+        If layers are already initialized, we can skip them.
+        """
+        # Example: Initialize layers manually if needed
+        for name, module in self.model.min_gru_model.named_children():
+            if isinstance(module, nn.Module):
+                if 'token_emb' in name:
+                    # Token embeddings, if needed, you can initialize with a custom scheme
+                    nn.init.xavier_uniform_(module.weight)
+                elif isinstance(module, nn.Linear):
+                    # Initialize Linear layers if not initialized already
+                    if module.weight is not None:
+                        nn.init.xavier_uniform_(module.weight)
+                    if module.bias is not None:
+                        nn.init.zeros_(module.bias)
+                # Initialize other layers similarly, depending on the type
+                elif isinstance(module, nn.LayerNorm):
+                    # Initialize LayerNorm layers
+                    nn.init.constant_(module.weight, 1.0)
+                    nn.init.constant_(module.bias, 0)