mirnaaiman's picture
Update app.py
05d0933 verified
import gradio as gr
from huggingface_hub import InferenceClient
import os
MODEL_NAME = "meta-llama/Llama-2-7b-chat"
HF_TOKEN = os.getenv("API_TOKEN_2")
def query_model(prompt):
if not prompt or not prompt.strip():
return "Please enter a prompt."
try:
client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN)
formatted_prompt = f"<s>[INST] {prompt.strip()} [/INST]"
response = client.text_generation(
formatted_prompt,
max_new_tokens=300,
temperature=0.6,
top_p=0.9,
repetition_penalty=1.1,
do_sample=True,
return_full_text=False
)
return response if response else "(No response from model.)"
except Exception as e:
return f"Error: {str(e)}\n\nThis can happen if the model is gated, requires a Hugging Face token, or you need to accept its terms of use on the Hugging Face website."
gr.Interface(
fn=query_model,
inputs=gr.Textbox(lines=4, label="Enter your prompt:"),
outputs=gr.Textbox(lines=10, label="Model Response"),
title="Simple Mistral-7B-Instruct Demo",
description="Enter a prompt and get a response from mistralai/Mistral-7B-Instruct-v0.1."
).launch()