TOOTLE's picture
Update app.py
be0f89c verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Model path - use the actual Hugging Face model ID or local path
MODEL_PATH = "TOOTLE/Gemma_instruct_model_gguf" # or your local model path
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
You are a software engineering expert and your job is help your junior solve coding problems.
### Input:
{}
### Response:
"""
def load_model():
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
torch_dtype=torch.float16, # Spécifiez float16 pour économiser de la mémoire
device_map="auto",
offload_folder="offload" # Ajoutez un dossier pour le déchargement des poids
)
return model, tokenizer
def chatbot_response(prompt):
inputs = tokenizer(
alpaca_prompt.format(prompt),
return_tensors="pt",
truncation=True,
max_length=512
)
print(inputs)
outputs = model.generate(
inputs["input_ids"],
max_new_tokens=1024,
temperature=0.7,
do_sample=True
)
print(outputs)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
reponse = response.split("### Response:")
return reponse[-1]
# Load model and tokenizer
model, tokenizer = load_model()
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# 💬 Chat with Gemma Model")
with gr.Row():
input_text = gr.Textbox(
label="Ask your question:",
placeholder="Example: Code in python a function that perform the addition of two float numbers..."
)
output_text = gr.Textbox(label="Model response:")
submit_button = gr.Button("Send")
submit_button.click(chatbot_response, inputs=input_text, outputs=output_text)
if __name__ == "__main__":
demo.launch()