Spaces:

ninagala
/

shakespeare-app

Sleeping

App Files Files Community

ninagala commited on Jan 19

Commit

5873e46

verified ·

1 Parent(s): 3ecf49e

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -1

app.py CHANGED Viewed

@@ -9,7 +9,135 @@ import os
 import json
 import math
-# (Previous model code...)
 def generate_text(prompt, max_length=100, temperature=0.7):
     try:

 import json
 import math
+class PositionalEncoding(nn.Module):
+    def __init__(self, d_model: int, max_seq_length: int = 512):
+        super().__init__()
+        position = torch.arange(max_seq_length).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
+        pe = torch.zeros(1, max_seq_length, d_model)
+        pe[0, :, 0::2] = torch.sin(position * div_term)
+        pe[0, :, 1::2] = torch.cos(position * div_term)
+        self.register_buffer('pe', pe)
+    def forward(self, x):
+        """x: [batch_size, seq_len, d_model]"""
+        return x + self.pe[:, :x.size(1), :]
+class DecoderBlock(nn.Module):
+    def __init__(self, d_model: int, n_heads: int, d_ff: int = 2048, dropout: float = 0.1):
+        super().__init__()
+        self.self_attention = nn.MultiheadAttention(d_model, n_heads, dropout=dropout, batch_first=True)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.ff = nn.Sequential(
+            nn.Linear(d_model, d_ff),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(d_ff, d_model)
+        )
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x, mask=None):
+        attn_output, _ = self.self_attention(x, x, x, attn_mask=mask)
+        x = self.norm1(x + self.dropout(attn_output))
+        ff_output = self.ff(x)
+        x = self.norm2(x + self.dropout(ff_output))
+        return x
+class TransformerDecoder(nn.Module):
+    def __init__(self,
+                 vocab_size: int,
+                 d_model: int = 1024,
+                 n_layers: int = 12,
+                 n_heads: int = 16,
+                 d_ff: int = 4096,
+                 max_seq_length: int = 256,
+                 dropout: float = 0.1):
+        super().__init__()
+        self.max_seq_length = max_seq_length
+        self.token_embedding = nn.Embedding(vocab_size, d_model)
+        self.positional_encoding = PositionalEncoding(d_model, max_seq_length)
+        self.dropout = nn.Dropout(dropout)
+        self.layers = nn.ModuleList([
+            DecoderBlock(d_model, n_heads, d_ff, dropout)
+            for _ in range(n_layers)
+        ])
+        self.final_layer = nn.Linear(d_model, vocab_size)
+        self._init_weights()
+    def _init_weights(self):
+        nn.init.normal_(self.token_embedding.weight, mean=0.0, std=0.01)
+        for layer in self.layers:
+            nn.init.normal_(layer.self_attention.in_proj_weight, mean=0.0, std=0.01)
+            nn.init.normal_(layer.self_attention.out_proj.weight, mean=0.0, std=0.01)
+            for name, param in layer.ff.named_parameters():
+                if 'weight' in name:
+                    nn.init.normal_(param, mean=0.0, std=0.01)
+                elif 'bias' in name:
+                    nn.init.zeros_(param)
+        nn.init.normal_(self.final_layer.weight, mean=0.0, std=0.01)
+        nn.init.zeros_(self.final_layer.bias)
+    def forward(self, x, mask=None):
+        # Create causal mask if not provided
+        if mask is None:
+            seq_length = x.size(1)
+            mask = torch.triu(torch.ones(seq_length, seq_length), diagonal=1).bool()
+            mask = mask.to(x.device)
+        x = self.token_embedding(x)
+        x = x.transpose(0, 1)  # Convert to sequence-first format
+        x = self.positional_encoding(x)
+        x = self.dropout(x)
+        x = x.transpose(0, 1)  # Convert back to batch-first
+        for layer in self.layers:
+            x = layer(x, mask=mask)
+        output = self.final_layer(x)
+        return output
+    @classmethod
+    def from_pretrained(cls, model_path: str, device: str = 'cpu'):
+        """Load a pretrained model from a directory"""
+        try:
+            # Load config
+            config_path = os.path.join(model_path, "config.json")
+            if not os.path.exists(config_path):
+                raise FileNotFoundError(f"Config not found at {config_path}")
+            with open(config_path) as f:
+                config = json.load(f)
+            # Create model instance
+            model = cls(
+                vocab_size=config['vocab_size'],
+                d_model=config['d_model'],
+                n_layers=config['n_layers'],
+                n_heads=config['n_heads'],
+                d_ff=config['d_ff'],
+                max_seq_length=config['max_seq_length'],
+                dropout=config.get('dropout', 0.1)
+            )
+            # Load weights
+            weights_path = os.path.join(model_path, "pytorch_model.bin")
+            if not os.path.exists(weights_path):
+                raise FileNotFoundError(f"Weights not found at {weights_path}")
+            state_dict = torch.load(weights_path, map_location=device)
+            model.load_state_dict(state_dict)
+            return model.to(device)
+        except Exception as e:
+            raise Exception(f"Error loading model from {model_path}: {str(e)}")
 def generate_text(prompt, max_length=100, temperature=0.7):
     try: