import gradio as gr import os import torch from transformers import AutoTokenizer, AutoModelForCausalLM model_path = "cognitivecomputations/dolphin-2.7-mixtral-8x7b" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path) def chat(prompt): input_ids = tokenizer.encode(prompt, return_tensors="pt") output = model.generate(input_ids, max_length=1024, num_return_sequences=1, top_p=0.9, top_k=50, num_beams=2, early_stopping=True) response = tokenizer.decode(output[0], skip_special_tokens=True) return response demo = gr.Interface( fn=chat, inputs=gr.Textbox(placeholder="Enter your message here", lines=5), outputs=gr.Textbox(label="Bot's Response", lines=5) ) if __name__ == "__main__": demo.launch()