Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
-
import os
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
import gradio as gr
|
5 |
-
hf_token = os.getenv("hftoken")
|
6 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
7 |
|
8 |
|
9 |
b_tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")#using small parameter version of model for faster inference on hf
|
10 |
b_model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m")
|
11 |
|
12 |
-
g_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b"
|
13 |
-
g_model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b"
|
14 |
|
15 |
def Sentence_Commpletion(model_name, input):
|
16 |
|
@@ -21,8 +21,8 @@ def Sentence_Commpletion(model_name, input):
|
|
21 |
outputs = model.generate(inputss.input_ids, max_length=30, num_return_sequences=1)
|
22 |
elif model_name == "Gemma":
|
23 |
tokenizer, model = g_tokenizer, g_model
|
24 |
-
|
25 |
-
outputs = model.generate(
|
26 |
return tokenizer.decode(outputs[0])
|
27 |
|
28 |
|
|
|
1 |
+
#import os
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
import gradio as gr
|
5 |
+
#hf_token = os.getenv("hftoken")
|
6 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
7 |
|
8 |
|
9 |
b_tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")#using small parameter version of model for faster inference on hf
|
10 |
b_model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m")
|
11 |
|
12 |
+
g_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b")#using small paramerter version of model for faster inference on hf
|
13 |
+
g_model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b")
|
14 |
|
15 |
def Sentence_Commpletion(model_name, input):
|
16 |
|
|
|
21 |
outputs = model.generate(inputss.input_ids, max_length=30, num_return_sequences=1)
|
22 |
elif model_name == "Gemma":
|
23 |
tokenizer, model = g_tokenizer, g_model
|
24 |
+
input_ids = tokenizer(input, return_tensors="pt").to("cuda")
|
25 |
+
outputs = model.generate(**input_ids, max_new_tokens=32).to("cuda")
|
26 |
return tokenizer.decode(outputs[0])
|
27 |
|
28 |
|