Spaces:

sapthesh
/

deepseekv3

Runtime error

sapthesh commited on Jan 3

Commit

ccfaaf5

verified ·

1 Parent(s): 0e79c88

Update custom_model.py

Files changed (1) hide show

custom_model.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
 import torch.nn as nn
-from transformers import PreTrainedModel, AutoConfig
 class CustomModel(PreTrainedModel):
     config_class = AutoConfig  # Use AutoConfig to dynamically load the configuration class
@@ -8,8 +8,18 @@ class CustomModel(PreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
         # Implement your model architecture here
         self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
         try:
@@ -18,8 +28,7 @@ class CustomModel(PreTrainedModel):
             # Initialize the model with the configuration
             model = cls(config)
             # Load the model weights using the transformers library
-            state_dict = torch.load(f"{pretrained_model_name_or_path}/pytorch_model.bin", map_location="cpu")
-            model.load_state_dict(state_dict)
             return model
         except Exception as e:
             print(f"Failed to load model from {pretrained_model_name_or_path}. Error: {e}")

 import torch
 import torch.nn as nn
+from transformers import PreTrainedModel, AutoConfig, AutoModel
 class CustomModel(PreTrainedModel):
     config_class = AutoConfig  # Use AutoConfig to dynamically load the configuration class
     def __init__(self, config):
         super().__init__(config)
         # Implement your model architecture here
+        self.encoder = AutoModel.from_config(config)  # Load the base model
         self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+    def forward(self, input_ids, attention_mask=None):
+        # Pass inputs through the encoder
+        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
+        # Get the pooled output (e.g., CLS token for classification tasks)
+        pooled_output = outputs.last_hidden_state[:, 0, :]
+        # Pass through the classifier
+        logits = self.classifier(pooled_output)
+        return logits
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
         try:
             # Initialize the model with the configuration
             model = cls(config)
             # Load the model weights using the transformers library
+            model.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
             return model
         except Exception as e:
             print(f"Failed to load model from {pretrained_model_name_or_path}. Error: {e}")