tiansz commited on
Commit
164d3c9
·
1 Parent(s): 774424e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +27 -0
README.md CHANGED
@@ -59,6 +59,33 @@ result = answer("你好")
59
  print(result)
60
  ```
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  <br>
63
 
64
  ## License
 
59
  print(result)
60
  ```
61
 
62
+ <br>
63
+ int8:
64
+
65
+ ```python
66
+ from transformers import LlamaForCausalLM, AutoTokenizer
67
+ import torch
68
+
69
+ ckpt = "tiansz/ChatYuan-7B-merge"
70
+ device = torch.device('cuda')
71
+ max_memory = f'{int(torch.cuda.mem_get_info()[0]/1024**3)-1}GB'
72
+ n_gpus = torch.cuda.device_count()
73
+ max_memory = {i: max_memory for i in range(n_gpus)}
74
+ model = LlamaForCausalLM.from_pretrained(ckpt, device_map='auto', load_in_8bit=True, max_memory=max_memory)
75
+ tokenizer = AutoTokenizer.from_pretrained(ckpt)
76
+
77
+ def answer(prompt):
78
+ prompt = f"用户:{prompt}\n小元:"
79
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
80
+ generate_ids = model.generate(input_ids, max_new_tokens=1024, do_sample = True, temperature = 0.7)
81
+ output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
82
+ response = output[len(prompt):]
83
+ return response
84
+
85
+ result = answer("你好")
86
+ print(result)
87
+ ```
88
+
89
  <br>
90
 
91
  ## License