Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# Model path - use the actual Hugging Face model ID or local path | |
MODEL_PATH = "TOOTLE/Gemma_instruct_model_gguf" # or your local model path | |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. | |
### Instruction: | |
You are a software engineering expert and your job is help your junior solve coding problems. | |
### Input: | |
{} | |
### Response: | |
""" | |
def load_model(): | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_PATH, | |
torch_dtype=torch.float16, # Spécifiez float16 pour économiser de la mémoire | |
device_map="auto", | |
offload_folder="offload" # Ajoutez un dossier pour le déchargement des poids | |
) | |
return model, tokenizer | |
def chatbot_response(prompt): | |
inputs = tokenizer( | |
alpaca_prompt.format(prompt), | |
return_tensors="pt", | |
truncation=True, | |
max_length=512 | |
) | |
print(inputs) | |
outputs = model.generate( | |
inputs["input_ids"], | |
max_new_tokens=1024, | |
temperature=0.7, | |
do_sample=True | |
) | |
print(outputs) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
print(response) | |
reponse = response.split("### Response:") | |
return reponse[-1] | |
# Load model and tokenizer | |
model, tokenizer = load_model() | |
# Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# 💬 Chat with Gemma Model") | |
with gr.Row(): | |
input_text = gr.Textbox( | |
label="Ask your question:", | |
placeholder="Example: Code in python a function that perform the addition of two float numbers..." | |
) | |
output_text = gr.Textbox(label="Model response:") | |
submit_button = gr.Button("Send") | |
submit_button.click(chatbot_response, inputs=input_text, outputs=output_text) | |
if __name__ == "__main__": | |
demo.launch() |