BruceLee1234's picture
Update app.py
acca2c1 verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load the HelpingAI2.5-2B model
model = AutoModelForCausalLM.from_pretrained("OEvortex/HelpingAI2.5-2B")
tokenizer = AutoTokenizer.from_pretrained("OEvortex/HelpingAI2.5-2B")
# Move model to GPU (if available) or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Define the function for generating responses
def generate_response(user_input):
# Define the chat input structure
chat = [
{ "role": "system", "content": "You are HelpingAI, an emotional AI. Always answer my questions in the HelpingAI style." },
{ "role": "user", "content": user_input }
]
chat_input = ""
for message in chat:
role = message["role"]
content = message["content"]
chat_input += f"{role}: {content}\n"
# Tokenize the input
inputs = tokenizer(chat_input, return_tensors="pt").to(device)
# Generate text
outputs = model.generate(
inputs["input_ids"],
max_new_tokens=256,
do_sample=True,
temperature=0.6,
top_p=0.9,
)
response = outputs[0][inputs["input_ids"].shape[-1]:]
return tokenizer.decode(response, skip_special_tokens=True)
# Create the Gradio interface
iface = gr.Interface(
fn=generate_response,
inputs="text",
outputs="text",
live=True
)
# Launch the Gradio app
iface.launch()