Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -6,22 +6,21 @@ import numpy as np
|
|
6 |
from torch.nn import functional as F
|
7 |
import os
|
8 |
from threading import Thread
|
9 |
-
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
12 |
tok = AutoTokenizer.from_pretrained("google/gemma-2b-it",token=token)
|
13 |
# using CUDA for an optimal experience
|
14 |
-
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
15 |
model = model.to(device)
|
16 |
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
def user(message, history):
|
21 |
-
# Append the user's message to the conversation history
|
22 |
-
return "", history + [[message, ""]]
|
23 |
-
|
24 |
-
|
25 |
def chat(message, history):
|
26 |
chat = []
|
27 |
for item in history:
|
|
|
6 |
from torch.nn import functional as F
|
7 |
import os
|
8 |
from threading import Thread
|
9 |
+
import spaces
|
10 |
|
11 |
+
token = os.environ["HF_TOKEN"]
|
12 |
+
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it",
|
13 |
+
# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
14 |
+
torch_dtype=torch.float16,
|
15 |
+
token=token)
|
16 |
tok = AutoTokenizer.from_pretrained("google/gemma-2b-it",token=token)
|
17 |
# using CUDA for an optimal experience
|
18 |
+
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
19 |
+
device = torch.device('cuda')
|
20 |
model = model.to(device)
|
21 |
|
22 |
|
23 |
+
@spaces.GPU
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def chat(message, history):
|
25 |
chat = []
|
26 |
for item in history:
|