import transformers import torch import tokenizers import gradio as gr def get_model(model_name, model_path='pytorch_model.bin'): tokenizer = transformers.GPT2Tokenizer.from_pretrained(model_name) model = transformers.OPTForCausalLM.from_pretrained(model_name) model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'))) model.eval() return model, tokenizer def predict(text, model, tokenizer, n_beams=5, temperature=2.5, top_p=0.8, length_of_generated=300): text += '\n' input_ids = tokenizer.encode(text, return_tensors="pt") length_of_prompt = len(input_ids[0]) with torch.no_grad(): out = model.generate(input_ids, do_sample=True, num_beams=n_beams, temperature=temperature, top_p=top_p, max_length=length_of_prompt + length_of_generated, eos_token_id=tokenizer.eos_token_id ) return list(map(tokenizer.decode, out))[0] model, tokenizer = get_model('big-kek/NeuroSkeptic', 'OPT13b-skeptic.bin') example = 'Who is Bill Gates really?' demo = gr.Interface( fn=predict, inputs=[ gr.components.Textbox(label="what is your interest?",value = example), ], outputs=[ gr.components.Textbox(label="oh! my ...",interactive = False), ], ) demo.launch()