Spaces:

nickgardner
/

chatbot-demo

Sleeping

App Files Files Community

nickgardner commited on Apr 22, 2023

Commit

238ab50

•

1 Parent(s): fc75f91

full func test 4

Browse files

Files changed (3) hide show

app.py +49 -3
requirements.txt +5 -0
transformer.py +220 -0

app.py CHANGED Viewed

@@ -1,7 +1,53 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 iface.launch()

 import gradio as gr
+import torch
+from torchtext.data.utils import get_tokenizer
+import numpy as np
+from huggingface_hub import hf_hub_download
+from transformer import Transformer
+MAX_LEN = 350
+tokenizer = get_tokenizer('spacy', language='en_core_web_sm')
+vocab = torch.load(hf_hub_download(repo_id="https://huggingface.co/nickgardner/chatbot/",
+                                   filename="vocab.pth"))
+vocab_token_dict = vocab.get_stoi()
+pad_token = vocab_token_dict['<pad>']
+unknown_token = vocab_token_dict['<unk>']
+sos_token = vocab_token_dict['<sos>']
+eos_token = vocab_token_dict['<eos>']
+text_pipeline = lambda x: vocab(tokenizer(x))
+d_model = 512
+heads = 8
+N = 6
+src_vocab = len(vocab)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = Transformer(len(vocab), len(vocab), d_model, N, heads).to(device)
+model.load_state_dict(hf_hub_download(repo_id="https://huggingface.co/nickgardner/chatbot/",
+                                      filename="alpaca_train_380_epoch.pt"))
+model.eval()
+def respond(custom_string):
+    model.eval()
+    src = torch.tensor(text_pipeline(custom_string), dtype=torch.int64).unsqueeze(0).to(device)
+    src_mask = ((src != pad_token) & (src != unknown_token)).unsqueeze(-2).to(device)
+    e_outputs = model.encoder(src, src_mask)
+    outputs = torch.zeros(MAX_LEN).type_as(src.data).to(device)
+    outputs[0] = torch.tensor([vocab.get_stoi()['<sos>']])
+    for i in range(1, MAX_LEN):
+        trg_mask = np.triu(np.ones([1, i, i]), k=1).astype('uint8')
+        trg_mask = torch.autograd.Variable(torch.from_numpy(trg_mask) == 0).to(device)
+        out = model.out(model.decoder(outputs[:i].unsqueeze(0), e_outputs, src_mask, trg_mask))
+        out = torch.nn.functional.softmax(out, dim=-1)
+        val, ix = out[:, -1].data.topk(1)
+        outputs[i] = ix[0][0]
+        if ix[0][0] == vocab_token_dict['<eos>']:
+            break
+    return ' '.join([vocab.get_itos()[ix] for ix in outputs[1:i]])
+iface = gr.Interface(fn=respond, inputs="text", outputs="text")
 iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy
+torch
+torchtext
+spacy
+!python -m spacy download en

transformer.py ADDED Viewed

	@@ -0,0 +1,220 @@

+# code taken from https://towardsdatascience.com/how-to-code-the-transformer-in-pytorch-24db27c8f9ec
+# and https://pytorch.org/tutorials/beginner/transformer_tutorial.html
+import torch
+import math
+import copy
+class Embedder(torch.nn.Module):
+    def __init__(self, vocab_size, d_model):
+        super().__init__()
+        self.embed = torch.nn.Embedding(vocab_size, d_model)
+    def forward(self, x):
+        return self.embed(x)
+class PositionalEncoder(torch.nn.Module):
+    def __init__(self, d_model, dropout=0.1, max_seq_len=80):
+        super().__init__()
+        self.dropout = torch.nn.Dropout(p=dropout)
+        position = torch.arange(max_seq_len).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
+        pe = torch.zeros(max_seq_len, 1, d_model)
+        pe[:, 0, 0::2] = torch.sin(position * div_term)
+        pe[:, 0, 1::2] = torch.cos(position * div_term)
+        self.register_buffer('pe',
+                             pe)  # notifies PyTorch that this value should be saved like a model parameter but should not have gradients
+    def forward(self, x):
+        x = x + self.pe[:x.size(0)]
+        return self.dropout(x)
+class MultiHeadAttention(torch.nn.Module):
+    def __init__(self, heads, d_model, dropout=0.1):
+        super().__init__()
+        self.d_model = d_model
+        self.d_k = d_model // heads
+        self.h = heads
+        self.q_linear = torch.nn.Linear(d_model, d_model)
+        self.v_linear = torch.nn.Linear(d_model, d_model)
+        self.k_linear = torch.nn.Linear(d_model, d_model)
+        self.dropout = torch.nn.Dropout(dropout)
+        self.out = torch.nn.Linear(d_model, d_model)
+    def forward(self, q, k, v, mask=None):
+        bs = q.size(0)
+        # perform linear operation and split into h heads
+        k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
+        q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
+        v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
+        # transpose to get dimensions bs * h * sl * d_model
+        k = k.transpose(1, 2)
+        q = q.transpose(1, 2)
+        v = v.transpose(1, 2)
+        # calculate attention using function we will define next
+        scores = attention(q, k, v, self.d_k, mask, self.dropout)
+        # concatenate heads and put through final linear layer
+        concat = scores.transpose(1, 2).contiguous().view(bs, -1, self.d_model)
+        output = self.out(concat)
+        return output
+def attention(q, k, v, d_k, mask=None, dropout=None):
+    scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)
+    if mask is not None:
+        mask = mask.unsqueeze(1)
+        scores = scores.masked_fill(mask == 0, -1e9)
+    scores = torch.nn.functional.softmax(scores, dim=-1)
+    if dropout is not None:
+        scores = dropout(scores)
+    output = torch.matmul(scores, v)
+    return output
+class FeedForward(torch.nn.Module):
+    def __init__(self, d_model, d_ff=2048, dropout=0.1):
+        super().__init__()
+        # We set d_ff as a default to 2048
+        self.linear_1 = torch.nn.Linear(d_model, d_ff)
+        self.dropout = torch.nn.Dropout(dropout)
+        self.linear_2 = torch.nn.Linear(d_ff, d_model)
+    def forward(self, x):
+        x = self.dropout(torch.nn.functional.relu(self.linear_1(x)))
+        x = self.linear_2(x)
+        return x
+class Norm(torch.nn.Module):
+    def __init__(self, d_model, eps=1e-6):
+        super().__init__()
+        self.size = d_model
+        # create two learnable parameters to calibrate normalization
+        self.alpha = torch.nn.Parameter(torch.ones(self.size))
+        self.bias = torch.nn.Parameter(torch.zeros(self.size))
+        self.eps = eps
+    def forward(self, x):
+        norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias
+        return norm
+# build an encoder layer with one multi-head attention layer and one # feed-forward layer
+class EncoderLayer(torch.nn.Module):
+    def __init__(self, d_model, heads, dropout=0.1):
+        super().__init__()
+        self.norm_1 = Norm(d_model)
+        self.norm_2 = Norm(d_model)
+        self.attn = MultiHeadAttention(heads, d_model)
+        self.ff = FeedForward(d_model)
+        self.dropout_1 = torch.nn.Dropout(dropout)
+        self.dropout_2 = torch.nn.Dropout(dropout)
+    def forward(self, x, mask):
+        x2 = self.norm_1(x)
+        x = x + self.dropout_1(self.attn(x2, x2, x2, mask))
+        x2 = self.norm_2(x)
+        x = x + self.dropout_2(self.ff(x2))
+        return x
+# build a decoder layer with two multi-head attention layers and
+# one feed-forward layer
+class DecoderLayer(torch.nn.Module):
+    def __init__(self, d_model, heads, dropout=0.1):
+        super().__init__()
+        self.norm_1 = Norm(d_model)
+        self.norm_2 = Norm(d_model)
+        self.norm_3 = Norm(d_model)
+        self.dropout_1 = torch.nn.Dropout(dropout)
+        self.dropout_2 = torch.nn.Dropout(dropout)
+        self.dropout_3 = torch.nn.Dropout(dropout)
+        self.attn_1 = MultiHeadAttention(heads, d_model)
+        self.attn_2 = MultiHeadAttention(heads, d_model)
+        self.ff = FeedForward(d_model)
+    def forward(self, x, e_outputs, src_mask, trg_mask):
+        x2 = self.norm_1(x)
+        x = x + self.dropout_1(self.attn_1(x2, x2, x2, trg_mask))
+        x2 = self.norm_2(x)
+        x = x + self.dropout_2(self.attn_2(x2, e_outputs, e_outputs,
+                                           src_mask))
+        x2 = self.norm_3(x)
+        x = x + self.dropout_3(self.ff(x2))
+        return x
+# We can then build a convenient cloning function that can generate multiple layers:
+def get_clones(module, N):
+    return torch.nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+class Encoder(torch.nn.Module):
+    def __init__(self, vocab_size, d_model, N, heads):
+        super().__init__()
+        self.N = N
+        self.embed = Embedder(vocab_size, d_model)
+        self.pe = PositionalEncoder(d_model)
+        self.layers = get_clones(EncoderLayer(d_model, heads), N)
+        self.norm = Norm(d_model)
+    def forward(self, src, mask):
+        x = self.embed(src)
+        x = self.pe(x)
+        for i in range(self.N):
+            x = self.layers[i](x, mask)
+        return self.norm(x)
+class Decoder(torch.nn.Module):
+    def __init__(self, vocab_size, d_model, N, heads):
+        super().__init__()
+        self.N = N
+        self.embed = Embedder(vocab_size, d_model)
+        self.pe = PositionalEncoder(d_model)
+        self.layers = get_clones(DecoderLayer(d_model, heads), N)
+        self.norm = Norm(d_model)
+    def forward(self, trg, e_outputs, src_mask, trg_mask):
+        x = self.embed(trg)
+        x = self.pe(x)
+        for i in range(self.N):
+            x = self.layers[i](x, e_outputs, src_mask, trg_mask)
+        return self.norm(x)
+class Transformer(torch.nn.Module):
+    def __init__(self, src_vocab, trg_vocab, d_model, N, heads):
+        super().__init__()
+        self.encoder = Encoder(src_vocab, d_model, N, heads)
+        self.decoder = Decoder(trg_vocab, d_model, N, heads)
+        self.out = torch.nn.Linear(d_model, trg_vocab)
+    def forward(self, src, trg, src_mask, trg_mask):
+        e_outputs = self.encoder(src, src_mask)
+        d_output = self.decoder(trg, e_outputs, src_mask, trg_mask)
+        output = self.out(d_output)
+        return output
+# we don't perform softmax on the output as this will be handled
+# automatically by our loss function