karanthacker commited on
Commit
be34a4f
1 Parent(s): 4f2f464

update ui and model

Browse files
Files changed (2) hide show
  1. app.py +58 -3
  2. transformer.py +193 -0
app.py CHANGED
@@ -1,7 +1,62 @@
1
  import gradio as gr
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  iface.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ from torchtext.data.utils import get_tokenizer
4
+ import numpy as np
5
+ import subprocess
6
 
7
+ from huggingface_hub import hf_hub_download
8
+ from transformer import Transformer
9
 
10
+ model_url = "https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl"
11
+ subprocess.run(["pip", "install", model_url])
12
+
13
+ MAX_LEN = 350
14
+
15
+ tokenizer = get_tokenizer('spacy', language='en_core_web_sm')
16
+ vocab = torch.load(hf_hub_download(repo_id="karanthacker/chat_ai",
17
+ filename="vocab.pth"))
18
+ vocab_token_dict = vocab.get_stoi()
19
+ indices_to_tokens = vocab.get_itos()
20
+ pad_token = vocab_token_dict['<pad>']
21
+ unknown_token = vocab_token_dict['<unk>']
22
+ sos_token = vocab_token_dict['<sos>']
23
+ eos_token = vocab_token_dict['<eos>']
24
+ text_pipeline = lambda x: vocab(tokenizer(x))
25
+
26
+ d_model = 512
27
+ heads = 8
28
+ N = 6
29
+ src_vocab = len(vocab)
30
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
+ model = Transformer(len(vocab), len(vocab), d_model, N, heads).to(device)
32
+ model.load_state_dict(torch.load(hf_hub_download(repo_id="karanthacker/chat_ai",
33
+ filename="alpaca_weights.pt"), map_location=device))
34
+ model.eval()
35
+
36
+ def respond(search_type, input):
37
+ model.eval()
38
+ src = torch.tensor(text_pipeline(input), dtype=torch.int64).unsqueeze(0).to(device)
39
+ src_mask = ((src != pad_token) & (src != unknown_token)).unsqueeze(-2).to(device)
40
+ e_outputs = model.encoder(src, src_mask)
41
+
42
+ outputs = torch.zeros(MAX_LEN).type_as(src.data).to(device)
43
+ outputs[0] = torch.tensor([vocab.get_stoi()['<sos>']])
44
+ for i in range(1, MAX_LEN):
45
+ trg_mask = np.triu(np.ones([1, i, i]), k=1).astype('uint8')
46
+ trg_mask = torch.autograd.Variable(torch.from_numpy(trg_mask) == 0).to(device)
47
+
48
+ out = model.out(model.decoder(outputs[:i].unsqueeze(0), e_outputs, src_mask, trg_mask))
49
+
50
+ out = torch.nn.functional.softmax(out, dim=-1)
51
+ val, ix = out[:, -1].data.topk(1)
52
+
53
+ outputs[i] = ix[0][0]
54
+ if ix[0][0] == vocab_token_dict['<eos>']:
55
+ break
56
+
57
+ return ' '.join([indices_to_tokens[ix] for ix in outputs[1:i]])
58
+
59
+ iface = gr.Interface(fn=respond,
60
+ inputs="text",
61
+ outputs="text")
62
  iface.launch()
transformer.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import math
3
+ import copy
4
+
5
+ class Embedder(torch.nn.Module):
6
+ def __init__(self, vocab_size, d_model):
7
+ super().__init__()
8
+ self.embed = torch.nn.Embedding(vocab_size, d_model)
9
+ def forward(self, x):
10
+ return self.embed(x)
11
+
12
+ class PositionalEncoder(torch.nn.Module):
13
+ def __init__(self, d_model, dropout=0.1, max_seq_len = 80):
14
+ super().__init__()
15
+ self.dropout = torch.nn.Dropout(p=dropout)
16
+
17
+ position = torch.arange(max_seq_len).unsqueeze(1)
18
+ div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
19
+ pe = torch.zeros(max_seq_len, 1, d_model)
20
+ pe[:, 0, 0::2] = torch.sin(position * div_term)
21
+ pe[:, 0, 1::2] = torch.cos(position * div_term)
22
+ self.register_buffer('pe', pe) # notifies PyTorch that this value should be saved like a model parameter but should not have gradients
23
+
24
+ def forward(self, x):
25
+ x = x + self.pe[:x.size(0)]
26
+ return self.dropout(x)
27
+
28
+ class MultiHeadAttention(torch.nn.Module):
29
+ def __init__(self, heads, d_model, dropout = 0.1):
30
+ super().__init__()
31
+
32
+ self.d_model = d_model
33
+ self.d_k = d_model // heads
34
+ self.h = heads
35
+
36
+ self.q_linear = torch.nn.Linear(d_model, d_model)
37
+ self.v_linear = torch.nn.Linear(d_model, d_model)
38
+ self.k_linear = torch.nn.Linear(d_model, d_model)
39
+ self.dropout = torch.nn.Dropout(dropout)
40
+ self.out = torch.nn.Linear(d_model, d_model)
41
+
42
+ def forward(self, q, k, v, mask=None):
43
+ bs = q.size(0)
44
+
45
+ # perform linear operation and split into h heads
46
+
47
+ k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
48
+ q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
49
+ v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
50
+
51
+ # transpose to get dimensions bs * h * sl * d_model
52
+
53
+ k = k.transpose(1,2)
54
+ q = q.transpose(1,2)
55
+ v = v.transpose(1,2)
56
+
57
+ # calculate attention using function we will define next
58
+ scores = attention(q, k, v, self.d_k, mask, self.dropout)
59
+
60
+ # concatenate heads and put through final linear layer
61
+ concat = scores.transpose(1,2).contiguous().view(bs, -1, self.d_model)
62
+
63
+ output = self.out(concat)
64
+
65
+ return output
66
+
67
+ def attention(q, k, v, d_k, mask=None, dropout=None):
68
+ scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)
69
+ if mask is not None:
70
+ mask = mask.unsqueeze(1)
71
+ scores = scores.masked_fill(mask == 0, -1e9)
72
+ scores = torch.nn.functional.softmax(scores, dim=-1)
73
+
74
+ if dropout is not None:
75
+ scores = dropout(scores)
76
+
77
+ output = torch.matmul(scores, v)
78
+ return output
79
+
80
+ class FeedForward(torch.nn.Module):
81
+ def __init__(self, d_model, d_ff=2048, dropout = 0.1):
82
+ super().__init__()
83
+ # We set d_ff as a default to 2048
84
+ self.linear_1 = torch.nn.Linear(d_model, d_ff)
85
+ self.dropout = torch.nn.Dropout(dropout)
86
+ self.linear_2 = torch.nn.Linear(d_ff, d_model)
87
+ def forward(self, x):
88
+ x = self.dropout(torch.nn.functional.relu(self.linear_1(x)))
89
+ x = self.linear_2(x)
90
+ return x
91
+
92
+ class Norm(torch.nn.Module):
93
+ def __init__(self, d_model, eps = 1e-6):
94
+ super().__init__()
95
+
96
+ self.size = d_model
97
+ # create two learnable parameters to calibrate normalization
98
+ self.alpha = torch.nn.Parameter(torch.ones(self.size))
99
+ self.bias = torch.nn.Parameter(torch.zeros(self.size))
100
+ self.eps = eps
101
+
102
+ def forward(self, x):
103
+ norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias
104
+ return norm
105
+
106
+ # encoder layer with one multi-head attention layer and one # feed-forward layer
107
+ class EncoderLayer(torch.nn.Module):
108
+ def __init__(self, d_model, heads, dropout = 0.1):
109
+ super().__init__()
110
+ self.norm_1 = Norm(d_model)
111
+ self.norm_2 = Norm(d_model)
112
+ self.attn = MultiHeadAttention(heads, d_model)
113
+ self.ff = FeedForward(d_model)
114
+ self.dropout_1 = torch.nn.Dropout(dropout)
115
+ self.dropout_2 = torch.nn.Dropout(dropout)
116
+
117
+ def forward(self, x, mask):
118
+ x2 = self.norm_1(x)
119
+ x = x + self.dropout_1(self.attn(x2,x2,x2,mask))
120
+ x2 = self.norm_2(x)
121
+ x = x + self.dropout_2(self.ff(x2))
122
+ return x
123
+
124
+ # build a decoder layer with two multi-head attention layers and
125
+ # one feed-forward layer
126
+ class DecoderLayer(torch.nn.Module):
127
+ def __init__(self, d_model, heads, dropout=0.1):
128
+ super().__init__()
129
+ self.norm_1 = Norm(d_model)
130
+ self.norm_2 = Norm(d_model)
131
+ self.norm_3 = Norm(d_model)
132
+
133
+ self.dropout_1 = torch.nn.Dropout(dropout)
134
+ self.dropout_2 = torch.nn.Dropout(dropout)
135
+ self.dropout_3 = torch.nn.Dropout(dropout)
136
+
137
+ self.attn_1 = MultiHeadAttention(heads, d_model)
138
+ self.attn_2 = MultiHeadAttention(heads, d_model)
139
+ self.ff = FeedForward(d_model)
140
+ def forward(self, x, e_outputs, src_mask, trg_mask):
141
+ x2 = self.norm_1(x)
142
+ x = x + self.dropout_1(self.attn_1(x2, x2, x2, trg_mask))
143
+ x2 = self.norm_2(x)
144
+ x = x + self.dropout_2(self.attn_2(x2, e_outputs, e_outputs,
145
+ src_mask))
146
+ x2 = self.norm_3(x)
147
+ x = x + self.dropout_3(self.ff(x2))
148
+ return x
149
+ # generate multiple layers
150
+ def get_clones(module, N):
151
+ return torch.nn.ModuleList([copy.deepcopy(module) for i in range(N)])
152
+
153
+ class Encoder(torch.nn.Module):
154
+ def __init__(self, vocab_size, d_model, N, heads):
155
+ super().__init__()
156
+ self.N = N
157
+ self.embed = Embedder(vocab_size, d_model)
158
+ self.pe = PositionalEncoder(d_model)
159
+ self.layers = get_clones(EncoderLayer(d_model, heads), N)
160
+ self.norm = Norm(d_model)
161
+ def forward(self, src, mask):
162
+ x = self.embed(src)
163
+ x = self.pe(x)
164
+ for i in range(self.N):
165
+ x = self.layers[i](x, mask)
166
+ return self.norm(x)
167
+
168
+ class Decoder(torch.nn.Module):
169
+ def __init__(self, vocab_size, d_model, N, heads):
170
+ super().__init__()
171
+ self.N = N
172
+ self.embed = Embedder(vocab_size, d_model)
173
+ self.pe = PositionalEncoder(d_model)
174
+ self.layers = get_clones(DecoderLayer(d_model, heads), N)
175
+ self.norm = Norm(d_model)
176
+ def forward(self, trg, e_outputs, src_mask, trg_mask):
177
+ x = self.embed(trg)
178
+ x = self.pe(x)
179
+ for i in range(self.N):
180
+ x = self.layers[i](x, e_outputs, src_mask, trg_mask)
181
+ return self.norm(x)
182
+
183
+ class Transformer(torch.nn.Module):
184
+ def __init__(self, src_vocab, trg_vocab, d_model, N, heads):
185
+ super().__init__()
186
+ self.encoder = Encoder(src_vocab, d_model, N, heads)
187
+ self.decoder = Decoder(trg_vocab, d_model, N, heads)
188
+ self.out = torch.nn.Linear(d_model, trg_vocab)
189
+ def forward(self, src, trg, src_mask, trg_mask):
190
+ e_outputs = self.encoder(src, src_mask)
191
+ d_output = self.decoder(trg, e_outputs, src_mask, trg_mask)
192
+ output = self.out(d_output)
193
+ return output