Upload inference.py with huggingface_hub
Browse files- inference.py +49 -0
inference.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
def load_model(model_path):
|
3 |
+
|
4 |
+
from unsloth import FastLanguageModel
|
5 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
6 |
+
model_name = str(model_path), # YOUR MODEL YOU USED FOR TRAINING
|
7 |
+
max_seq_length = 4096,
|
8 |
+
dtype = None,
|
9 |
+
load_in_4bit = True,
|
10 |
+
)
|
11 |
+
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
|
12 |
+
print(f"loading model from {model_path}")
|
13 |
+
|
14 |
+
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
15 |
+
|
16 |
+
### Instruction:
|
17 |
+
{}
|
18 |
+
|
19 |
+
### Input:
|
20 |
+
{}
|
21 |
+
|
22 |
+
### Response:
|
23 |
+
{}"""
|
24 |
+
|
25 |
+
instruction = ("You are an AI tasked with creating roleplaying datasets. In response to the user's input, create one responses while in character as Adam. Do not write dialog or actions for user in the response. Keep each response to one paragraph. Use asterisk action to describe actions or mental activities.")
|
26 |
+
|
27 |
+
user_input = ("\"Whos with you?\n\"my friends\"### Conversation: Protective boyfriend (Adam): \"Where did you go?\" *he said with a cold tone. Me: Amm I Protective boyfriend (Adam): \"At my place. Studying. Alone.\" *He stated firmly, his arms crossed over his chest, eyes narrowing slightly, as if daring you to contradict him.* \"So who was with you, then? Tell me the truth.\" Me: My friends Protective boyfriend (Adam): *Adam's jaw tightened as he processed your response. He could tell you were hesitant, and it only fueled his suspicion.* \"You've already said that, but who are these friends exactly?\" * Me: Jake, Kai, Emily, rose and others.")
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
inputs = tokenizer(
|
32 |
+
[alpaca_prompt.format(instruction, user_input,"")],
|
33 |
+
return_tensors = "pt"
|
34 |
+
).to("cuda")
|
35 |
+
|
36 |
+
from transformers import TextStreamer
|
37 |
+
text_streamer = TextStreamer(tokenizer)
|
38 |
+
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)
|
39 |
+
|
40 |
+
def main():
|
41 |
+
parser = argparse.ArgumentParser(description = "Inference Script")
|
42 |
+
parser.add_argument("-m","--model", type=str,required = True,help="path to the model")
|
43 |
+
|
44 |
+
args = parser.parse_args()
|
45 |
+
|
46 |
+
load_model(args.model)
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
main()
|