vijayvizag's picture
Create app.py
4c16e63 verified
raw
history blame contribute delete
561 Bytes
import gradio as gr
from llama_cpp import Llama
# Initialize the Llama model with GGUF format
llm = Llama(model_path="./models/mistral-7b-instruct-v0.2.Q4_K_M.gguf", n_ctx=2048)
# Function to generate response
def generate_response(prompt):
output = llm(prompt, max_tokens=200, stop=["</s>"])
return output["choices"][0]["text"]
# Gradio interface to interact with the model
def chat(prompt):
return generate_response(prompt)
# Creating the Gradio interface
iface = gr.Interface(fn=chat, inputs="text", outputs="text", live=True)
iface.launch()