import gradio as gr
from llama_cpp import Llama

# Initialize the Llama model with GGUF format
llm = Llama(model_path="./models/mistral-7b-instruct-v0.2.Q4_K_M.gguf", n_ctx=2048)

# Function to generate response
def generate_response(prompt):
    output = llm(prompt, max_tokens=200, stop=["</s>"])
    return output["choices"][0]["text"]

# Gradio interface to interact with the model
def chat(prompt):
    return generate_response(prompt)

# Creating the Gradio interface
iface = gr.Interface(fn=chat, inputs="text", outputs="text", live=True)
iface.launch()