sanjanatule commited on
Commit
d3369cb
1 Parent(s): 5d0f08d

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +22 -12
utils.py CHANGED
@@ -1,24 +1,34 @@
 
1
  import torch
2
  from torch import nn
3
  import lightning.pytorch as pl
4
  from torch.nn import functional as F
5
 
 
6
  chars = ['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
7
-
8
  vocab_size = len(chars)
 
 
 
 
 
 
 
 
9
  block_size = 32
10
- n_embd = 128
11
- n_head = 4
12
- n_layer = 8
13
- dropout = 0.1
14
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
15
 
16
  class Head(nn.Module):
17
  """ one head of self-attention """
18
 
19
  def __init__(self, head_size):
20
  super().__init__()
21
- self.key = nn.Linear(n_embd, head_size, bias=False)
22
  self.query = nn.Linear(n_embd, head_size, bias=False)
23
  self.value = nn.Linear(n_embd, head_size, bias=False)
24
  self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
@@ -44,8 +54,8 @@ class MultiHeadAttention(nn.Module):
44
 
45
  def __init__(self, num_heads, head_size):
46
  super().__init__()
47
- self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
48
- self.proj = nn.Linear(n_embd, n_embd)
49
  self.dropout = nn.Dropout(dropout)
50
 
51
  def forward(self, x):
@@ -75,10 +85,10 @@ class Block(nn.Module):
75
  # n_embd: embedding dimension, n_head: the number of heads we'd like
76
  super().__init__()
77
  head_size = n_embd // n_head
78
- self.sa = MultiHeadAttention(n_head, head_size)
79
  self.ffwd = FeedFoward(n_embd)
80
- self.ln1 = nn.LayerNorm(n_embd)
81
- self.ln2 = nn.LayerNorm(n_embd)
82
 
83
  def forward(self, x):
84
  x = x + self.sa(self.ln1(x))
 
1
+
2
  import torch
3
  from torch import nn
4
  import lightning.pytorch as pl
5
  from torch.nn import functional as F
6
 
7
+ # encoding
8
  chars = ['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
 
9
  vocab_size = len(chars)
10
+ stoi = { ch:i for i,ch in enumerate(chars) }
11
+ itos = { i:ch for i,ch in enumerate(chars) }
12
+
13
+ # encode / decode function
14
+ encode = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers
15
+ decode = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
16
+
17
+ # model config
18
  block_size = 32
19
+ n_embd = 128
20
+ n_head = 4
21
+ n_layer = 8
22
+ dropout = 0.1
23
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
24
+ learning_rate = 1e-3
25
 
26
  class Head(nn.Module):
27
  """ one head of self-attention """
28
 
29
  def __init__(self, head_size):
30
  super().__init__()
31
+ self.key = nn.Linear(n_embd, head_size, bias=False)
32
  self.query = nn.Linear(n_embd, head_size, bias=False)
33
  self.value = nn.Linear(n_embd, head_size, bias=False)
34
  self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
 
54
 
55
  def __init__(self, num_heads, head_size):
56
  super().__init__()
57
+ self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
58
+ self.proj = nn.Linear(n_embd, n_embd)
59
  self.dropout = nn.Dropout(dropout)
60
 
61
  def forward(self, x):
 
85
  # n_embd: embedding dimension, n_head: the number of heads we'd like
86
  super().__init__()
87
  head_size = n_embd // n_head
88
+ self.sa = MultiHeadAttention(n_head, head_size)
89
  self.ffwd = FeedFoward(n_embd)
90
+ self.ln1 = nn.LayerNorm(n_embd)
91
+ self.ln2 = nn.LayerNorm(n_embd)
92
 
93
  def forward(self, x):
94
  x = x + self.sa(self.ln1(x))