from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr prologue = """quality: high [System] Assistant is a distilled language model trained by the community.<|STK_SP|> [System] <|STK_SP|> [User]""" tokenizer = AutoTokenizer.from_pretrained("mrsteyk/openchatgpt-neox-125m", use_fast=True) model = AutoModelForCausalLM.from_pretrained("mrsteyk/openchatgpt-neox-125m") def chat(inpt, max_new_tokens, top_k, top_p, temperature, repetition_penalty): inputs = tokenizer(f"{prologue}\n{inpt}<|STK_SP|>\n\n[Assistant]\n", return_tensors="pt").input_ids outputs = model.generate(inputs, max_new_tokens=max_new_tokens, do_sample=True, top_k=top_k, top_p=top_p, eos_token_id=tokenizer.sep_token_id, temperature=temperature, repetition_penalty=repetition_penalty) return tokenizer.batch_decode([i[len(inputs[0]):] for i in outputs], skip_special_tokens=True)[0] gr.Interface( fn=chat, inputs=["textbox", gr.Slider(767, 2048 + 1), gr.Slider(0, 100, value=50), gr.Slider(0, 1, step=0.01, value=0.95), gr.Slider(0.01, 1, step=0.01, value=1), gr.Slider(1, 100, step=0.5)], outputs=[gr.Textbox(label="Assistant says")], examples=[ ["Hello, I have a question about American history. Who is the current Vice President of the United States?", 767, 50, 0.95, 1, 1], ["Hello, I have a question about quantum computing. Can quantum computers solve NP-complete problems in polynomial time?", 767, 50, 0.95, 1, 1], ["I'm wondering how to make an apple pie?", 767, 50, 0.95, 1, 1], ["Hi, I want to know about the GPT-3 model. Could you provide me some information about it?", 767, 50, 0.95, 1, 1], ["Please, help me understand LLMs!", 767, 50, 0.95, 1, 1], ["What is the meaning of life?", 767, 50, 0.95, 1, 1], ["What is the origin of the word 'sushi'?", 767, 50, 0.95, 1, 1], ["What's the difference between a chatbot and an AI?", 767, 50, 0.95, 1, 1], ["What's the difference between a monad and a functor in functional programming?", 767, 50, 0.95, 1, 1], ], cache_examples=False, ).launch()