File size: 2,067 Bytes
ab5c351
27f17d3
ca02bb9
4f8377f
2b2a7f0
73790df
06408d1
 
 
 
 
 
 
 
 
dd70866
06408d1
4f8377f
 
 
 
91fece8
 
 
4f8377f
 
1e2a80f
91fece8
f8a75b9
4f8377f
a058d50
4f8377f
 
 
 
be0f89c
4f8377f
 
c0ccf36
4f8377f
 
 
be0f89c
ab5c351
f8a75b9
be0f89c
b33958d
be0f89c
f8a75b9
4f8377f
 
 
 
f8a75b9
4f8377f
f8a75b9
 
4f8377f
 
9748948
4f8377f
 
 
 
f8a75b9
ab5c351
4f8377f
f8a75b9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Model path - use the actual Hugging Face model ID or local path
MODEL_PATH = "TOOTLE/Gemma_instruct_model_gguf"  # or your local model path

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are a software engineering expert and your job is help your junior solve coding problems.

### Input:
{}

### Response:
"""

def load_model():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_PATH,
        torch_dtype=torch.float16,  # Spécifiez float16 pour économiser de la mémoire
        device_map="auto",
        offload_folder="offload"  # Ajoutez un dossier pour le déchargement des poids
    )
    return model, tokenizer


def chatbot_response(prompt):
    inputs = tokenizer(
        alpaca_prompt.format(prompt), 
        return_tensors="pt", 
        truncation=True, 
        max_length=512
    )
    print(inputs)
    outputs = model.generate(
        inputs["input_ids"],
        max_new_tokens=1024,
        temperature=0.7,
        do_sample=True
    )
    print(outputs)
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(response)
    reponse = response.split("### Response:")
    return reponse[-1]

# Load model and tokenizer
model, tokenizer = load_model()

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 💬 Chat with Gemma Model")
    
    with gr.Row():
        input_text = gr.Textbox(
            label="Ask your question:",
            placeholder="Example: Code in python a function that perform the addition of two float numbers..."
        )
        output_text = gr.Textbox(label="Model response:")
    
    submit_button = gr.Button("Send")
    submit_button.click(chatbot_response, inputs=input_text, outputs=output_text)

if __name__ == "__main__":
    demo.launch()