import gradio as gr from llama_cpp import Llama llm = Llama( model_path="MKLLM-7B-Instruct-Q4_0.gguf", n_ctx=2048 ) def chat(message, history): response = llm.create_completion( f"USER: {message}\nASSISTANT:", max_tokens=512, temperature=0.7 ) return response['choices'][0]['text'] demo = gr.ChatInterface( chat, title="MKLLM Chat", ) demo.launch()