import sys, os os.system("!git clone https://github.com/karpathy/minGPT/") sys.path.append('minGPT') from transformers import GPT2Tokenizer, GPT2LMHeadModel from mingpt.model import GPT from minGPT.mingpt.utils import set_seed use_mingpt = True # use minGPT or huggingface/transformers model? model_type = 'gpt2-xl' device = 'cuda' if use_mingpt: model = GPT.from_pretrained(model_type) else: model = GPT2LMHeadModel.from_pretrained(model_type) model.config.pad_token_id = model.config.eos_token_id # suppress a warning # ship model to device and set to eval mode model.to(device) model.eval(); def generate(prompt='', num_samples=10, steps=20, do_sample=True): # tokenize the input prompt into integer input sequence tokenizer = GPT2Tokenizer.from_pretrained(model_type) if prompt == '': # to create unconditional samples we feed in the special start token prompt = '<|endoftext|>' encoded_input = tokenizer(prompt, return_tensors='pt').to(device) x = encoded_input['input_ids'] x = x.expand(num_samples, -1) # forward the model `steps` times to get samples, in a batch y = model.generate(x, max_new_tokens=steps, do_sample=do_sample, top_k=40) for i in range(num_samples): out = tokenizer.decode(y[i].cpu().squeeze()) print('-'*80) print(out) def infer(input): return generate(prompt=input, num_samples=3, steps=20) import gradio as gr gr.Interface(infer, "text", ["text", "text", "text"], examples=["I was commuting to my Silicon Valley job when I took a wrong turn. I"])