nickgardner commited on
Commit
238ab50
1 Parent(s): fc75f91

full func test 4

Browse files
Files changed (3) hide show
  1. app.py +49 -3
  2. requirements.txt +5 -0
  3. transformer.py +220 -0
app.py CHANGED
@@ -1,7 +1,53 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  iface.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ from torchtext.data.utils import get_tokenizer
4
+ import numpy as np
5
 
6
+ from huggingface_hub import hf_hub_download
7
+ from transformer import Transformer
8
 
9
+ MAX_LEN = 350
10
+
11
+ tokenizer = get_tokenizer('spacy', language='en_core_web_sm')
12
+ vocab = torch.load(hf_hub_download(repo_id="https://huggingface.co/nickgardner/chatbot/",
13
+ filename="vocab.pth"))
14
+ vocab_token_dict = vocab.get_stoi()
15
+ pad_token = vocab_token_dict['<pad>']
16
+ unknown_token = vocab_token_dict['<unk>']
17
+ sos_token = vocab_token_dict['<sos>']
18
+ eos_token = vocab_token_dict['<eos>']
19
+ text_pipeline = lambda x: vocab(tokenizer(x))
20
+
21
+ d_model = 512
22
+ heads = 8
23
+ N = 6
24
+ src_vocab = len(vocab)
25
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
+ model = Transformer(len(vocab), len(vocab), d_model, N, heads).to(device)
27
+ model.load_state_dict(hf_hub_download(repo_id="https://huggingface.co/nickgardner/chatbot/",
28
+ filename="alpaca_train_380_epoch.pt"))
29
+ model.eval()
30
+
31
+ def respond(custom_string):
32
+ model.eval()
33
+ src = torch.tensor(text_pipeline(custom_string), dtype=torch.int64).unsqueeze(0).to(device)
34
+ src_mask = ((src != pad_token) & (src != unknown_token)).unsqueeze(-2).to(device)
35
+ e_outputs = model.encoder(src, src_mask)
36
+
37
+ outputs = torch.zeros(MAX_LEN).type_as(src.data).to(device)
38
+ outputs[0] = torch.tensor([vocab.get_stoi()['<sos>']])
39
+ for i in range(1, MAX_LEN):
40
+ trg_mask = np.triu(np.ones([1, i, i]), k=1).astype('uint8')
41
+ trg_mask = torch.autograd.Variable(torch.from_numpy(trg_mask) == 0).to(device)
42
+
43
+ out = model.out(model.decoder(outputs[:i].unsqueeze(0), e_outputs, src_mask, trg_mask))
44
+ out = torch.nn.functional.softmax(out, dim=-1)
45
+ val, ix = out[:, -1].data.topk(1)
46
+
47
+ outputs[i] = ix[0][0]
48
+ if ix[0][0] == vocab_token_dict['<eos>']:
49
+ break
50
+ return ' '.join([vocab.get_itos()[ix] for ix in outputs[1:i]])
51
+
52
+ iface = gr.Interface(fn=respond, inputs="text", outputs="text")
53
  iface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy
2
+ torch
3
+ torchtext
4
+ spacy
5
+ !python -m spacy download en
transformer.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # code taken from https://towardsdatascience.com/how-to-code-the-transformer-in-pytorch-24db27c8f9ec
2
+ # and https://pytorch.org/tutorials/beginner/transformer_tutorial.html
3
+
4
+ import torch
5
+ import math
6
+ import copy
7
+
8
+
9
+ class Embedder(torch.nn.Module):
10
+ def __init__(self, vocab_size, d_model):
11
+ super().__init__()
12
+ self.embed = torch.nn.Embedding(vocab_size, d_model)
13
+
14
+ def forward(self, x):
15
+ return self.embed(x)
16
+
17
+
18
+ class PositionalEncoder(torch.nn.Module):
19
+ def __init__(self, d_model, dropout=0.1, max_seq_len=80):
20
+ super().__init__()
21
+ self.dropout = torch.nn.Dropout(p=dropout)
22
+
23
+ position = torch.arange(max_seq_len).unsqueeze(1)
24
+ div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
25
+ pe = torch.zeros(max_seq_len, 1, d_model)
26
+ pe[:, 0, 0::2] = torch.sin(position * div_term)
27
+ pe[:, 0, 1::2] = torch.cos(position * div_term)
28
+ self.register_buffer('pe',
29
+ pe) # notifies PyTorch that this value should be saved like a model parameter but should not have gradients
30
+
31
+ def forward(self, x):
32
+ x = x + self.pe[:x.size(0)]
33
+ return self.dropout(x)
34
+
35
+
36
+ class MultiHeadAttention(torch.nn.Module):
37
+ def __init__(self, heads, d_model, dropout=0.1):
38
+ super().__init__()
39
+
40
+ self.d_model = d_model
41
+ self.d_k = d_model // heads
42
+ self.h = heads
43
+
44
+ self.q_linear = torch.nn.Linear(d_model, d_model)
45
+ self.v_linear = torch.nn.Linear(d_model, d_model)
46
+ self.k_linear = torch.nn.Linear(d_model, d_model)
47
+ self.dropout = torch.nn.Dropout(dropout)
48
+ self.out = torch.nn.Linear(d_model, d_model)
49
+
50
+ def forward(self, q, k, v, mask=None):
51
+ bs = q.size(0)
52
+
53
+ # perform linear operation and split into h heads
54
+
55
+ k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
56
+ q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
57
+ v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
58
+
59
+ # transpose to get dimensions bs * h * sl * d_model
60
+
61
+ k = k.transpose(1, 2)
62
+ q = q.transpose(1, 2)
63
+ v = v.transpose(1, 2)
64
+
65
+ # calculate attention using function we will define next
66
+ scores = attention(q, k, v, self.d_k, mask, self.dropout)
67
+
68
+ # concatenate heads and put through final linear layer
69
+ concat = scores.transpose(1, 2).contiguous().view(bs, -1, self.d_model)
70
+
71
+ output = self.out(concat)
72
+
73
+ return output
74
+
75
+
76
+ def attention(q, k, v, d_k, mask=None, dropout=None):
77
+ scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)
78
+ if mask is not None:
79
+ mask = mask.unsqueeze(1)
80
+ scores = scores.masked_fill(mask == 0, -1e9)
81
+ scores = torch.nn.functional.softmax(scores, dim=-1)
82
+
83
+ if dropout is not None:
84
+ scores = dropout(scores)
85
+
86
+ output = torch.matmul(scores, v)
87
+ return output
88
+
89
+
90
+ class FeedForward(torch.nn.Module):
91
+ def __init__(self, d_model, d_ff=2048, dropout=0.1):
92
+ super().__init__()
93
+ # We set d_ff as a default to 2048
94
+ self.linear_1 = torch.nn.Linear(d_model, d_ff)
95
+ self.dropout = torch.nn.Dropout(dropout)
96
+ self.linear_2 = torch.nn.Linear(d_ff, d_model)
97
+
98
+ def forward(self, x):
99
+ x = self.dropout(torch.nn.functional.relu(self.linear_1(x)))
100
+ x = self.linear_2(x)
101
+ return x
102
+
103
+
104
+ class Norm(torch.nn.Module):
105
+ def __init__(self, d_model, eps=1e-6):
106
+ super().__init__()
107
+
108
+ self.size = d_model
109
+ # create two learnable parameters to calibrate normalization
110
+ self.alpha = torch.nn.Parameter(torch.ones(self.size))
111
+ self.bias = torch.nn.Parameter(torch.zeros(self.size))
112
+ self.eps = eps
113
+
114
+ def forward(self, x):
115
+ norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias
116
+ return norm
117
+
118
+
119
+ # build an encoder layer with one multi-head attention layer and one # feed-forward layer
120
+ class EncoderLayer(torch.nn.Module):
121
+ def __init__(self, d_model, heads, dropout=0.1):
122
+ super().__init__()
123
+ self.norm_1 = Norm(d_model)
124
+ self.norm_2 = Norm(d_model)
125
+ self.attn = MultiHeadAttention(heads, d_model)
126
+ self.ff = FeedForward(d_model)
127
+ self.dropout_1 = torch.nn.Dropout(dropout)
128
+ self.dropout_2 = torch.nn.Dropout(dropout)
129
+
130
+ def forward(self, x, mask):
131
+ x2 = self.norm_1(x)
132
+ x = x + self.dropout_1(self.attn(x2, x2, x2, mask))
133
+ x2 = self.norm_2(x)
134
+ x = x + self.dropout_2(self.ff(x2))
135
+ return x
136
+
137
+
138
+ # build a decoder layer with two multi-head attention layers and
139
+ # one feed-forward layer
140
+ class DecoderLayer(torch.nn.Module):
141
+ def __init__(self, d_model, heads, dropout=0.1):
142
+ super().__init__()
143
+ self.norm_1 = Norm(d_model)
144
+ self.norm_2 = Norm(d_model)
145
+ self.norm_3 = Norm(d_model)
146
+
147
+ self.dropout_1 = torch.nn.Dropout(dropout)
148
+ self.dropout_2 = torch.nn.Dropout(dropout)
149
+ self.dropout_3 = torch.nn.Dropout(dropout)
150
+
151
+ self.attn_1 = MultiHeadAttention(heads, d_model)
152
+ self.attn_2 = MultiHeadAttention(heads, d_model)
153
+ self.ff = FeedForward(d_model)
154
+
155
+ def forward(self, x, e_outputs, src_mask, trg_mask):
156
+ x2 = self.norm_1(x)
157
+ x = x + self.dropout_1(self.attn_1(x2, x2, x2, trg_mask))
158
+ x2 = self.norm_2(x)
159
+ x = x + self.dropout_2(self.attn_2(x2, e_outputs, e_outputs,
160
+ src_mask))
161
+ x2 = self.norm_3(x)
162
+ x = x + self.dropout_3(self.ff(x2))
163
+ return x
164
+
165
+
166
+ # We can then build a convenient cloning function that can generate multiple layers:
167
+ def get_clones(module, N):
168
+ return torch.nn.ModuleList([copy.deepcopy(module) for i in range(N)])
169
+
170
+
171
+ class Encoder(torch.nn.Module):
172
+ def __init__(self, vocab_size, d_model, N, heads):
173
+ super().__init__()
174
+ self.N = N
175
+ self.embed = Embedder(vocab_size, d_model)
176
+ self.pe = PositionalEncoder(d_model)
177
+ self.layers = get_clones(EncoderLayer(d_model, heads), N)
178
+ self.norm = Norm(d_model)
179
+
180
+ def forward(self, src, mask):
181
+ x = self.embed(src)
182
+ x = self.pe(x)
183
+ for i in range(self.N):
184
+ x = self.layers[i](x, mask)
185
+ return self.norm(x)
186
+
187
+
188
+ class Decoder(torch.nn.Module):
189
+ def __init__(self, vocab_size, d_model, N, heads):
190
+ super().__init__()
191
+ self.N = N
192
+ self.embed = Embedder(vocab_size, d_model)
193
+ self.pe = PositionalEncoder(d_model)
194
+ self.layers = get_clones(DecoderLayer(d_model, heads), N)
195
+ self.norm = Norm(d_model)
196
+
197
+ def forward(self, trg, e_outputs, src_mask, trg_mask):
198
+ x = self.embed(trg)
199
+ x = self.pe(x)
200
+ for i in range(self.N):
201
+ x = self.layers[i](x, e_outputs, src_mask, trg_mask)
202
+ return self.norm(x)
203
+
204
+
205
+ class Transformer(torch.nn.Module):
206
+ def __init__(self, src_vocab, trg_vocab, d_model, N, heads):
207
+ super().__init__()
208
+ self.encoder = Encoder(src_vocab, d_model, N, heads)
209
+ self.decoder = Decoder(trg_vocab, d_model, N, heads)
210
+ self.out = torch.nn.Linear(d_model, trg_vocab)
211
+
212
+ def forward(self, src, trg, src_mask, trg_mask):
213
+ e_outputs = self.encoder(src, src_mask)
214
+ d_output = self.decoder(trg, e_outputs, src_mask, trg_mask)
215
+ output = self.out(d_output)
216
+ return output
217
+
218
+
219
+ # we don't perform softmax on the output as this will be handled
220
+ # automatically by our loss function