Spaces:
Running
on
Zero
Running
on
Zero
updates
Browse files
app.py
CHANGED
@@ -6,6 +6,14 @@ import gradio as gr
|
|
6 |
import spaces
|
7 |
import torch
|
8 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
DESCRIPTION = """\
|
11 |
# Gemma 2 9B IT
|
@@ -24,11 +32,12 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
24 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
25 |
|
26 |
model_id = "anakin87/Phi-3.5-mini-ITA"
|
27 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
28 |
model = AutoModelForCausalLM.from_pretrained(
|
29 |
model_id,
|
30 |
device_map="auto",
|
31 |
torch_dtype=torch.bfloat16,
|
|
|
32 |
)
|
33 |
model.config.sliding_window = 4096
|
34 |
model.eval()
|
@@ -39,10 +48,10 @@ def generate(
|
|
39 |
message: str,
|
40 |
chat_history: list[tuple[str, str]],
|
41 |
max_new_tokens: int = 1024,
|
42 |
-
temperature: float = 0.
|
43 |
-
top_p: float = 0
|
44 |
top_k: int = 50,
|
45 |
-
repetition_penalty: float = 1.
|
46 |
) -> Iterator[str]:
|
47 |
conversation = []
|
48 |
for user, assistant in chat_history:
|
@@ -93,17 +102,17 @@ chat_interface = gr.ChatInterface(
|
|
93 |
),
|
94 |
gr.Slider(
|
95 |
label="Temperature",
|
96 |
-
minimum=0
|
97 |
maximum=4.0,
|
98 |
step=0.1,
|
99 |
-
value=0.
|
100 |
),
|
101 |
gr.Slider(
|
102 |
label="Top-p (nucleus sampling)",
|
103 |
minimum=0.05,
|
104 |
maximum=1.0,
|
105 |
step=0.05,
|
106 |
-
value=0
|
107 |
),
|
108 |
gr.Slider(
|
109 |
label="Top-k",
|
@@ -117,19 +126,20 @@ chat_interface = gr.ChatInterface(
|
|
117 |
minimum=1.0,
|
118 |
maximum=2.0,
|
119 |
step=0.05,
|
120 |
-
value=1.
|
121 |
),
|
122 |
],
|
123 |
stop_btn=None,
|
124 |
examples=[
|
125 |
["Ciao! Come stai?"],
|
126 |
-
["
|
127 |
-
["
|
128 |
-
["
|
|
|
|
|
129 |
["Scrivi un articolo di 100 parole sui 'Benefici dell'open-source nella ricerca sull'intelligenza artificiale'"],
|
130 |
["Hello there! How are you doing?"],
|
131 |
["Can you explain briefly to me what is the Python programming language?"],
|
132 |
-
["Explain the plot of Cinderella in a sentence."],
|
133 |
["How many hours does it take a man to eat a Helicopter?"],
|
134 |
["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
|
135 |
],
|
|
|
6 |
import spaces
|
7 |
import torch
|
8 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
9 |
+
import subprocess
|
10 |
+
|
11 |
+
subprocess.run(
|
12 |
+
"pip install flash-attn --no-build-isolation",
|
13 |
+
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
14 |
+
shell=True,
|
15 |
+
)
|
16 |
+
|
17 |
|
18 |
DESCRIPTION = """\
|
19 |
# Gemma 2 9B IT
|
|
|
32 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
33 |
|
34 |
model_id = "anakin87/Phi-3.5-mini-ITA"
|
35 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)
|
36 |
model = AutoModelForCausalLM.from_pretrained(
|
37 |
model_id,
|
38 |
device_map="auto",
|
39 |
torch_dtype=torch.bfloat16,
|
40 |
+
trust_remote_code=True,
|
41 |
)
|
42 |
model.config.sliding_window = 4096
|
43 |
model.eval()
|
|
|
48 |
message: str,
|
49 |
chat_history: list[tuple[str, str]],
|
50 |
max_new_tokens: int = 1024,
|
51 |
+
temperature: float = 0.001,
|
52 |
+
top_p: float = 1.0,
|
53 |
top_k: int = 50,
|
54 |
+
repetition_penalty: float = 1.0,
|
55 |
) -> Iterator[str]:
|
56 |
conversation = []
|
57 |
for user, assistant in chat_history:
|
|
|
102 |
),
|
103 |
gr.Slider(
|
104 |
label="Temperature",
|
105 |
+
minimum=0,
|
106 |
maximum=4.0,
|
107 |
step=0.1,
|
108 |
+
value=0.001,
|
109 |
),
|
110 |
gr.Slider(
|
111 |
label="Top-p (nucleus sampling)",
|
112 |
minimum=0.05,
|
113 |
maximum=1.0,
|
114 |
step=0.05,
|
115 |
+
value=1.0,
|
116 |
),
|
117 |
gr.Slider(
|
118 |
label="Top-k",
|
|
|
126 |
minimum=1.0,
|
127 |
maximum=2.0,
|
128 |
step=0.05,
|
129 |
+
value=1.0,
|
130 |
),
|
131 |
],
|
132 |
stop_btn=None,
|
133 |
examples=[
|
134 |
["Ciao! Come stai?"],
|
135 |
+
["Pro e contro di una relazione a lungo termine. Elenco puntato con max 3 pro e 3 contro sintetici."],
|
136 |
+
["Quante ore impiega un uomo per mangiare un elicottero?"],
|
137 |
+
["Come si apre un file JSON in Python?"],
|
138 |
+
["Fammi un elenco puntato dei pro e contro di vivere in Italia. Massimo 2 pro e 2 contro."],
|
139 |
+
["Inventa una breve storia con animali sul valore dell'amicizia."],
|
140 |
["Scrivi un articolo di 100 parole sui 'Benefici dell'open-source nella ricerca sull'intelligenza artificiale'"],
|
141 |
["Hello there! How are you doing?"],
|
142 |
["Can you explain briefly to me what is the Python programming language?"],
|
|
|
143 |
["How many hours does it take a man to eat a Helicopter?"],
|
144 |
["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
|
145 |
],
|