import gradio as gr from transformers import pipeline import torch # Initialize the pipeline for text generation # pipe = pipeline("text-generation", model="cognitivecomputations/dolphin-2.9.4-llama3.1-8b") pipe = pipeline("text-generation", model="cognitivecomputations/dolphin-2.9.4-llama3.1-8b", torch_dtype=torch.float16) def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): # Prepare conversation history with system message conversation_history = system_message + "\n" for user_message, assistant_message in history: if user_message: conversation_history += f"User: {user_message}\n" if assistant_message: conversation_history += f"Assistant: {assistant_message}\n" conversation_history += f"User: {message}\n" # Generate response response = "" result = pipe( conversation_history, max_length=max_tokens, do_sample=True, temperature=temperature, top_p=top_p )[0]["generated_text"] # Extract only the new assistant response new_response = result.split(conversation_history)[-1].strip() for token in new_response: response += token yield response # Define Gradio interface demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) if __name__ == "__main__": demo.launch()