yibolu96 commited on
Commit
9d60792
1 Parent(s): 9ba46d2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -4
README.md CHANGED
@@ -56,17 +56,20 @@ docker run --gpus=1 --rm --net=host -v ${PWD}:/workdir yibolu96/lyra-chatglm-env
56
 
57
  ```python
58
  from transformers import AutoTokenizer
59
- from faster_chat_glm import GLM6B, FasterChatGLM
 
60
 
 
61
 
62
  MAX_OUT_LEN = 100
63
- tokenizer = AutoTokenizer.from_pretrained('./models', trust_remote_code=True)
 
64
  input_str = ["为什么我们需要对深度学习模型加速?", ]
65
  inputs = tokenizer(input_str, return_tensors="pt", padding=True)
66
  input_ids = inputs.input_ids.to('cuda:0')
67
 
 
68
 
69
- plan_path = './models/glm6b-bs8.ftm'
70
  # kernel for chat model.
71
  kernel = GLM6B(plan_path=plan_path,
72
  batch_size=1,
@@ -78,7 +81,7 @@ kernel = GLM6B(plan_path=plan_path,
78
  vocab_size=150528,
79
  max_seq_len=MAX_OUT_LEN)
80
 
81
- chat = FasterChatGLM(model_dir="./models", kernel=kernel).half().cuda()
82
 
83
  # generate
84
  sample_output = chat.generate(inputs=input_ids, max_length=MAX_OUT_LEN)
 
56
 
57
  ```python
58
  from transformers import AutoTokenizer
59
+ from lyraChatGLM import GLM6B, FasterChatGLM
60
+ import os
61
 
62
+ current_workdir = os.path.dirname(__file__)
63
 
64
  MAX_OUT_LEN = 100
65
+ chatglm6b_dir = os.path.join(current_workdir, "models")
66
+ tokenizer = AutoTokenizer.from_pretrained(chatglm6b_dir, trust_remote_code=True)
67
  input_str = ["为什么我们需要对深度学习模型加速?", ]
68
  inputs = tokenizer(input_str, return_tensors="pt", padding=True)
69
  input_ids = inputs.input_ids.to('cuda:0')
70
 
71
+ plan_path = os.path.join(current_workdir, "models/glm6b-bs8.ftm")
72
 
 
73
  # kernel for chat model.
74
  kernel = GLM6B(plan_path=plan_path,
75
  batch_size=1,
 
81
  vocab_size=150528,
82
  max_seq_len=MAX_OUT_LEN)
83
 
84
+ chat = FasterChatGLM(model_dir=chatglm6b_dir, kernel=kernel).half().cuda()
85
 
86
  # generate
87
  sample_output = chat.generate(inputs=input_ids, max_length=MAX_OUT_LEN)