NetoAI commited on
Commit
ba1befb
1 Parent(s): 2332f87

Upload inference.py

Browse files
Files changed (1) hide show
  1. inference.py +33 -0
inference.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ import torch
3
+
4
+ # Load tokenizer and model
5
+ tokenizer = AutoTokenizer.from_pretrained(".", use_auth_token=None)
6
+ model = AutoModelForCausalLM.from_pretrained(".", use_auth_token=None)
7
+
8
+ # Set device
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+
11
+ # Example text input
12
+ text_input = "How QOS is applied on routers?"
13
+
14
+ p="""
15
+ <|system|>
16
+ You are a helpful assistant.<|end|>
17
+ <|user|>""" + text_input + """<|end|>
18
+ <|assistant|>
19
+ """
20
+
21
+ # Tokenize and move input to device
22
+ inputs = tokenizer(p, return_tensors="pt")
23
+ inputs = inputs.to(device)
24
+
25
+ print("User Query: " + text_input)
26
+ # Generate text on the device
27
+ outputs = model.generate(**inputs, max_length=2000, num_return_sequences=1)
28
+
29
+ print("Model Response: ")
30
+ # Decode generated text
31
+ for output in outputs:
32
+ generated_text = tokenizer.decode(output, skip_special_tokens=True)
33
+ print(generated_text)