santuchal commited on
Commit
bbb1205
1 Parent(s): 04b293b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -0
app.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ device = "cuda" # or "cpu"
4
+ model_path = "ibm-granite/granite-8b-code-instruct"
5
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
6
+ # drop device_map if running on CPU
7
+ model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
8
+ model.eval()
9
+ # change input text as desired
10
+ chat = [
11
+ { "role": "user", "content": "Write a code to find the maximum value in a list of numbers." },
12
+ ]
13
+ chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
14
+ # tokenize the text
15
+ input_tokens = tokenizer(chat, return_tensors="pt")
16
+ # transfer tokenized inputs to the device
17
+ for i in input_tokens:
18
+ input_tokens[i] = input_tokens[i].to(device)
19
+ # generate output tokens
20
+ output = model.generate(**input_tokens, max_new_tokens=100)
21
+ # decode output tokens into text
22
+ output = tokenizer.batch_decode(output)
23
+ # loop over the batch to print, in this example the batch size is 1
24
+ for i in output:
25
+ print(i)