import gradio as gr from llama_cpp import Llama # Initialize the Llama model with GGUF format llm = Llama(model_path="./models/mistral-7b-instruct-v0.2.Q4_K_M.gguf", n_ctx=2048) # Function to generate response def generate_response(prompt): output = llm(prompt, max_tokens=200, stop=[""]) return output["choices"][0]["text"] # Gradio interface to interact with the model def chat(prompt): return generate_response(prompt) # Creating the Gradio interface iface = gr.Interface(fn=chat, inputs="text", outputs="text", live=True) iface.launch()