import gradio as gr from llama_cpp import Llama llm = Llama( model_path="mkllm-7b-instruct-q4_0.gguf", n_ctx=2048 ) def chat(message, history): response = llm.create_completion( f"USER: {message}\nASSISTANT:", max_tokens=512, temperature=0.7 ) return response['choices'][0]['text'] demo = gr.ChatInterface( chat, title="MKLLM Chat", ) demo.launch(server_name="0.0.0.0", server_port=7860)