efederici commited on
Commit
5e3a4bc
1 Parent(s): cbf7017

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import (
4
+ AutoModelForCausalLM,
5
+ AutoTokenizer,
6
+ TextIteratorStreamer,
7
+ )
8
+ import os
9
+ from threading import Thread
10
+ import spaces
11
+ import time
12
+ import subprocess
13
+
14
+ PLACEHOLDER = """
15
+ <div style="padding: 40px; text-align: center; display: flex; flex-direction: column; align-items: center;">
16
+ <img src="https://i.imgur.com/dgSNbTl.jpg" style="width: 90%; max-width: 650px; height: auto; opacity: 0.8; ">
17
+ <h1 style="font-size: 28px; margin-top: 20px; margin-bottom: 2px; opacity: 0.55;">mii-llm / Maestrale</h1>
18
+ <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Chiedi pure...</p>
19
+ </div>
20
+ """
21
+
22
+ DESCRIPTION = """<div>
23
+ <p>🇮🇹 Italian LLM <a href="https://huggingface.co/mii-llm/maestrale-chat-v0.3-beta"><b>Maestrale Chat v0.3 beta</b></a>. Maestrale is a powerful language model for Italian, trained by mii-llm, based on Mistral 7B.</p>
24
+ <p>🔎 For more details about the Maestrale model and how to use it with <code>transformers</code>, visit the <a href="https://huggingface.co/mii-llm/maestrale-chat-v0.3-beta">model card</a>.</p>
25
+ </div>"""
26
+
27
+ tokenizer = AutoTokenizer.from_pretrained("mii-llm/maestrale-chat-v0.4-alpha")
28
+ model = AutoModelForCausalLM.from_pretrained("mii-llm/maestrale-chat-v0.4-alpha", device_map="auto")
29
+
30
+ terminators = [
31
+ tokenizer.eos_token_id,
32
+ tokenizer.convert_tokens_to_ids("<|im_end|>")
33
+ ]
34
+
35
+ if torch.cuda.is_available():
36
+ device = torch.device("cuda")
37
+ print(f"Using GPU: {torch.cuda.get_device_name(device)}")
38
+ else:
39
+ device = torch.device("cpu")
40
+ print("Using CPU")
41
+
42
+
43
+ model = model.to(device)
44
+
45
+
46
+ @spaces.GPU(duration=60)
47
+ def chat(message, history, system, temperature, do_sample, max_tokens):
48
+ chat = [{"role": "system", "content": system}] if system else []
49
+ chat.extend(
50
+ {"role": role, "content": content}
51
+ for user, assistant in history
52
+ for role, content in [("user", user), ("assistant", assistant)]
53
+ )
54
+ chat.append({"role": "user", "content": message})
55
+
56
+ messages = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
57
+ model_inputs = tokenizer([messages], return_tensors="pt").to(device)
58
+
59
+ streamer = TextIteratorStreamer(
60
+ tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
61
+ )
62
+
63
+ generate_kwargs = {
64
+ **model_inputs,
65
+ "streamer": streamer,
66
+ "max_new_tokens": max_tokens,
67
+ "do_sample": do_sample,
68
+ "temperature": temperature,
69
+ "eos_token_id": terminators,
70
+ }
71
+
72
+ thread = Thread(target=model.generate, kwargs=generate_kwargs)
73
+ thread.start()
74
+
75
+ partial_text = ""
76
+ for new_text in streamer:
77
+ partial_text += new_text
78
+ yield partial_text
79
+
80
+ yield partial_text
81
+
82
+
83
+ chatbot = gr.Chatbot(height=550, placeholder=PLACEHOLDER, label='Conversazione')
84
+
85
+ demo = gr.ChatInterface(
86
+ fn=chat,
87
+ chatbot=chatbot,
88
+ fill_height=True,
89
+ theme=gr.themes.Soft(),
90
+ additional_inputs_accordion=gr.Accordion(
91
+ label="⚙️ Parametri", open=False, render=False
92
+ ),
93
+ additional_inputs=[
94
+ gr.Textbox(
95
+ label="System",
96
+ value="sei un assistente utile.",
97
+ ),
98
+ gr.Slider(
99
+ minimum=0, maximum=1, step=0.1, value=0.7, label="Temperature", render=False
100
+ ),
101
+ gr.Checkbox(label="Sampling", value=True),
102
+ gr.Slider(
103
+ minimum=128,
104
+ maximum=4096,
105
+ step=1,
106
+ value=512,
107
+ label="Max new tokens",
108
+ render=False,
109
+ ),
110
+ ],
111
+ stop_btn="Stop Generation",
112
+ cache_examples=False,
113
+ title="Maestrale Chat v0.3 beta",
114
+ description=DESCRIPTION
115
+ )
116
+
117
+ demo.launch()