drod75 commited on
Commit
9213c90
·
verified ·
1 Parent(s): fae4998

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -12,18 +12,18 @@ login(token=api_key)
12
  # setup model
13
  model_id = "google/gemma-2-2b-it"
14
  dtype = torch.bfloat16
15
-
16
  tokenizer = AutoTokenizer.from_pretrained(model_id)
17
- model = AutoModelForCausalLM.from_pretrained(
18
- model_id,
19
- device_map="auto",
20
- torch_dtype=dtype,)
21
 
22
  def poet(text):
23
  prompt = 'Make 25 lines, it has to be absolutely 25 lines of text no less no exception, of shakespeare based on this prompt: ' + text
24
  chat = [{"role": "user", "content": prompt}]
25
  prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
26
  inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
 
27
  outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=350)
28
  return tokenizer.decode(outputs[0])
29
 
 
12
  # setup model
13
  model_id = "google/gemma-2-2b-it"
14
  dtype = torch.bfloat16
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
  tokenizer = AutoTokenizer.from_pretrained(model_id)
17
+ model = AutoModelForCausalLM.from_pretrained(model_id)
18
+ model.to(device)
19
+ model.eval()
 
20
 
21
  def poet(text):
22
  prompt = 'Make 25 lines, it has to be absolutely 25 lines of text no less no exception, of shakespeare based on this prompt: ' + text
23
  chat = [{"role": "user", "content": prompt}]
24
  prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
25
  inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
26
+ inputs = inputs.to(device)
27
  outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=350)
28
  return tokenizer.decode(outputs[0])
29