inference code
Browse files
README.md
CHANGED
@@ -1,3 +1,34 @@
|
|
1 |
-
---
|
2 |
-
license: mit
|
3 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
---
|
4 |
+
|
5 |
+
from transformers import TextStreamer
|
6 |
+
from unsloth import FastLanguageModel
|
7 |
+
import torch
|
8 |
+
alpaca_prompt = """
|
9 |
+
### Instruction:
|
10 |
+
{}
|
11 |
+
|
12 |
+
### Input:
|
13 |
+
{}
|
14 |
+
|
15 |
+
### Response:
|
16 |
+
{}"""
|
17 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
18 |
+
model_name = "Bikas0/Bengali-Question-Answer-Llama3", # YOUR MODEL YOU USED FOR TRAINING
|
19 |
+
max_seq_length = 2048,
|
20 |
+
dtype = torch.float16,
|
21 |
+
load_in_4bit = True,
|
22 |
+
)
|
23 |
+
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
|
24 |
+
inputs = tokenizer(
|
25 |
+
[
|
26 |
+
alpaca_prompt.format(
|
27 |
+
"Please provide a detailed answer to the following question", # instruction
|
28 |
+
"বাংলা একাডেমি আইন কোন কারণে সদস্যপদ বাতিল করা হবে ?", # input
|
29 |
+
"", # output - leave this blank for generation!
|
30 |
+
)
|
31 |
+
], return_tensors = "pt").to("cuda")
|
32 |
+
|
33 |
+
text_streamer = TextStreamer(tokenizer)
|
34 |
+
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 2048)
|