FiendHunter commited on
Commit
6b4591d
·
verified ·
1 Parent(s): 70eff70

infer script w8s updated and stackexchange data added

Browse files
Files changed (3) hide show
  1. bitcoin_stackexchange.zip +3 -0
  2. checkpoint-1875.zip +3 -0
  3. infer.py +46 -0
bitcoin_stackexchange.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2474bbc7a9e6a7cf46d94025d6aaa96c4c8a3c83eb04cd0edb1db79c96b307d5
3
+ size 89022159
checkpoint-1875.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45206d48ce551acd137fac3370e458bfc54c661f6a1cd940a76552ec5c9e29bf
3
+ size 463297422
infer.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
3
+ from peft import PeftModel, PeftConfig
4
+
5
+ base_model = "meta-llama/Meta-Llama-3.1-8B-Instruct"
6
+ adapter_path = "checkpoint-1875"
7
+
8
+ print("Loading base model...")
9
+ tokenizer = AutoTokenizer.from_pretrained(base_model)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ base_model,
12
+ torch_dtype=torch.bfloat16,
13
+ device_map="auto"
14
+ )
15
+
16
+ print("Loading LoRA adapter...")
17
+ peft_config = PeftConfig.from_pretrained(adapter_path)
18
+ model = PeftModel.from_pretrained(model, adapter_path)
19
+ model.set_adapter("default")
20
+
21
+ stopping_criteria = tokenizer.encode("Comments-", add_special_tokens=False)
22
+
23
+ streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
24
+
25
+ print("\nModel ready. Type your message below (type 'exit' to quit):")
26
+ while True:
27
+ user_input = input("\nYou: ")
28
+ if user_input.lower() in {"exit", "quit"}:
29
+ print("Exiting...")
30
+ break
31
+
32
+ prompt = f"[INST] {user_input} [/INST]"
33
+
34
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
35
+
36
+ with torch.no_grad():
37
+ _ = model.generate(
38
+ **inputs,
39
+ max_new_tokens=256,
40
+ do_sample=True,
41
+ top_p=0.95,
42
+ temperature=0.7,
43
+ repetition_penalty=1.2,
44
+ no_repeat_ngram_size=3,
45
+ streamer=streamer
46
+ )