injilashah commited on
Commit
c944f45
·
verified ·
1 Parent(s): 25c12ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -1,16 +1,16 @@
1
- import os
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import gradio as gr
5
- hf_token = os.getenv("hftoken")
6
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
7
 
8
 
9
  b_tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")#using small parameter version of model for faster inference on hf
10
  b_model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m")
11
 
12
- g_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b",use_auth_token = hf_token)#using small paramerter version of model for faster inference on hf
13
- g_model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b",use_auth_token = hf_token)
14
 
15
  def Sentence_Commpletion(model_name, input):
16
 
@@ -21,8 +21,8 @@ def Sentence_Commpletion(model_name, input):
21
  outputs = model.generate(inputss.input_ids, max_length=30, num_return_sequences=1)
22
  elif model_name == "Gemma":
23
  tokenizer, model = g_tokenizer, g_model
24
- inputss = tokenizer(input, return_tensors="pt").to("cuda")
25
- outputs = model.generate(inputss.input_ids, max_new_tokens=20).to("cuda")
26
  return tokenizer.decode(outputs[0])
27
 
28
 
 
1
+ #import os
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import gradio as gr
5
+ #hf_token = os.getenv("hftoken")
6
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
7
 
8
 
9
  b_tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")#using small parameter version of model for faster inference on hf
10
  b_model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m")
11
 
12
+ g_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b")#using small paramerter version of model for faster inference on hf
13
+ g_model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b")
14
 
15
  def Sentence_Commpletion(model_name, input):
16
 
 
21
  outputs = model.generate(inputss.input_ids, max_length=30, num_return_sequences=1)
22
  elif model_name == "Gemma":
23
  tokenizer, model = g_tokenizer, g_model
24
+ input_ids = tokenizer(input, return_tensors="pt").to("cuda")
25
+ outputs = model.generate(**input_ids, max_new_tokens=32).to("cuda")
26
  return tokenizer.decode(outputs[0])
27
 
28