import tiktoken import os import torch from torch.nn import functional as F from model import GPTConfig, GPT import gradio as gr device = 'cpu' if torch.cuda.is_available(): device = 'cuda' elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): device = "mps" print(f"using device: {device}") modelpath = '.' # STOP max_length = 500 enc = tiktoken.get_encoding('gpt2') # CHANGES IN CURRENT CODE ckpt_path = os.path.join(modelpath, 'GPT2ShakespeareModel.pt') print(ckpt_path) checkpoint = torch.load(ckpt_path, map_location=device) gptconf = GPTConfig(**checkpoint['model_args']) model = GPT(gptconf) state_dict = checkpoint['model'] unwanted_prefix = '_orig_mod.' for k,v in list(state_dict.items()): if k.startswith(unwanted_prefix): state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k) model.load_state_dict(state_dict) model.to(device) model = torch.compile(model) def generateText(inputText="JULIET\n", num_tokens=500): start_tokens = enc.encode(inputText) # print(start_tokens, len(start_tokens)) start_tokens = torch.tensor(start_tokens) x = start_tokens.view(1, len(start_tokens)) # print(x, x.shape) x = x.to(device) while x.size(1) < max_length: # forward the model to get the logits with torch.no_grad(): logits = model(x)[0] # (B, T, vocab_size) # take the logits at the last position logits = logits[:, -1, :] # (B, vocab_size) # get the probabilities probs = F.softmax(logits, dim=-1) # do top-k sampling of 50 (huggingface pipeline default) # topk_probs here becomes (5, 50), topk_indices is (5, 50) topk_probs, topk_indices = torch.topk(probs, 50, dim=-1) # select a token from the top-k probabilities # note: multinomial does not demand the input to sum to 1 ix = torch.multinomial(topk_probs, 1) # (B, 1) # gather the corresponding indices xcol = torch.gather(topk_indices, -1, ix) # (B, 1) # append to the sequence x = torch.cat((x, xcol), dim=1) # print(x.size(1)) # print the generated text tokens = x[0, :max_length].tolist() decoded = enc.decode(tokens) return decoded # def generateOutput(inputText="JULIET\n", num_tokens = 500): # context = torch.zeros((1, 1), dtype=torch.long, device=device) # return(decode(model.generate(context, max_new_tokens=num_tokens)[0].tolist())) title = "GPT from Scratch using char tokenizer to generate text based on training" description = "GPT from Scratch using char tokenizer to generate text based on training" examples = [["ROMEO:\nWith love's light wings did I o'er-perch these walls;\nFor stony limits cannot hold love out,\nAnd what love can do that dares love attempt;\nTherefore thy kinsmen are no let to me.\n", 500], ["ROMEO:\n", 500], ["JULIET:\n", 500], ["CAPULET:\nWhy, how now, kinsman! wherefore storm you so?\n", 500], ["KING RICHARD II:\nAy, hand from hand, my love, and heart from heart.\nAnd", 500], ["KING RICHARD II:\n", 500], ["CAPULET:\n", 500], ["QUEEN:\nBanish us both and send the king with me.\nAnd", 500], ["QUEEN:\n", 500], ["CORIOLANUS:\n", 500], ["MENENIUS:\n", 500] ] demo = gr.Interface( generateText, inputs = [ gr.Textbox(label="Starting text"), gr.Slider(100, 2000, value = 500, step=100, label="Number of chars that you want in your output"), ], outputs = [ gr.Text(), ], title = title, description = description, examples = examples, cache_examples=False ) demo.launch()