provaAncora / app.py
davnas's picture
Update app.py
b3990cf verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
# Load model and tokenizer with CPU-compatible settings
model_name = "davnas/Italian_Cousine_2.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Configure quantization properly
quantization_config = BitsAndBytesConfig(
load_in_4bit=False,
load_in_8bit=False,
bnb_4bit_quant_type=None
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="cpu", # Explicitly set to CPU
torch_dtype=torch.float32,
quantization_config=quantization_config,
use_safetensors=True,
low_cpu_mem_usage=True,
)
def respond(message, history, system_message, max_tokens, temperature, top_p):
# Format the conversation
messages = [{"role": "system", "content": system_message}]
# Add history
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Create the prompt using the tokenizer's chat template
input_ids = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
)
# Generate response
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer.pad_token_id,
)
# Decode and return the response
response = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
return response
# Create the interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a professional chef assistant who provides accurate and detailed recipes.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
title="Italian Cuisine Chatbot",
description="Ask me anything about Italian cuisine or cooking!"
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)