carsonhxsu commited on
Commit
53f87ca
1 Parent(s): 1c9695e

Update code

Browse files
README.md CHANGED
@@ -86,7 +86,7 @@ python demo.py
86
  ```python
87
  from lyraChatGLM import LyraChatGLM6B
88
 
89
- model_path = "./models/1-gpu-fp16.h5"
90
  tokenizer_path = "./models"
91
  data_type = "fp16"
92
  int8_mode = 0 # 1 for INT8 WEIGHT ONLY PTQ
 
86
  ```python
87
  from lyraChatGLM import LyraChatGLM6B
88
 
89
+ model_path = "./models/1-gpu-fp16.bin"
90
  tokenizer_path = "./models"
91
  data_type = "fp16"
92
  int8_mode = 0 # 1 for INT8 WEIGHT ONLY PTQ
demo.py CHANGED
@@ -3,13 +3,13 @@ import numpy as np
3
 
4
  model_path = "./models/1-gpu-fp16.bin"
5
  tokenizer_path = "./models"
6
- data_type = "fp16"
7
  int8_mode = 0
8
  max_output_length = 150
9
- arch = "Ampere" # Ampere or Volta
10
- cuda_version = 12 # cuda version, we currently support 11 and 12
11
 
12
- model = LyraChatGLM6B(model_path, tokenizer_path, data_type, int8_mode, arch, cuda_version)
13
 
14
  prompt = "今天天气大概 25度,有点小雨,吹着风,我想去户外散步,应该穿什么样的衣服裤子鞋子搭配。"
15
  # test_batch_size = 256
@@ -19,4 +19,4 @@ prompts = [prompt, ]
19
  # # If you want to get different output in same batch, you can set do_sample to True
20
  output_texts = model.generate(prompts, output_length=max_output_length,top_k=30, top_p=0.85, temperature=0.35, repetition_penalty=1.2, do_sample=False)
21
 
22
- print(output_texts)
 
3
 
4
  model_path = "./models/1-gpu-fp16.bin"
5
  tokenizer_path = "./models"
6
+ inference_data_type = "fp16"
7
  int8_mode = 0
8
  max_output_length = 150
9
+ arch = "Volta" # Ampere or Volta
10
+ cuda_version = 11 # cuda version, we currently support 11 and 12
11
 
12
+ model = LyraChatGLM6B(model_path, tokenizer_path, inference_data_type, int8_mode, arch, cuda_version)
13
 
14
  prompt = "今天天气大概 25度,有点小雨,吹着风,我想去户外散步,应该穿什么样的衣服裤子鞋子搭配。"
15
  # test_batch_size = 256
 
19
  # # If you want to get different output in same batch, you can set do_sample to True
20
  output_texts = model.generate(prompts, output_length=max_output_length,top_k=30, top_p=0.85, temperature=0.35, repetition_penalty=1.2, do_sample=False)
21
 
22
+ print(output_texts)
lyraChatGLM/ftlib/libth_transformer_sm70_cu11.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4a778897f6c5f77b0ea1cb14bb63732da9c3cc4e16ff16d9f911dcc8b6f6be5
3
- size 114267536
 
 
 
 
lyraChatGLM/ftlib/libth_transformer_sm70_cu12.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:99ac80b2f4c161bbacbf64a7607f323c612c7c5f26b83eaec7f559425f3a818b
3
- size 114186112
 
 
 
 
lyraChatGLM/ftlib/libth_transformer_sm80_cu11.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1d6cd03321b671275fcabb4136562845233875564047ccde20401fca4df45c2
3
- size 200834616
 
 
 
 
lyraChatGLM/ftlib/libth_transformer_sm80_cu12.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2da10aad8e92bcdf45b15884cee63e845f582cd28bcc0f7f1c2a4f6a101e9646
3
- size 200916960
 
 
 
 
lyraChatGLM/lyra_glm.py CHANGED
@@ -134,7 +134,10 @@ class LyraChatGLM6B:
134
  ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
135
  ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
136
 
137
- input_token_ids = self.tokenizer(prompts, return_tensors="pt", padding=True).input_ids.int()
 
 
 
138
  input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
139
  mask_positions = torch.IntTensor([seq.index(130001) for seq in input_token_ids.tolist()])
140
 
 
134
  ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
135
  ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
136
 
137
+ # input_token_ids = self.tokenizer(prompts, return_tensors="pt", padding=True).input_ids.int()
138
+ raw_input_token_ids = self.tokenizer(prompts, padding=True)
139
+ input_token_ids = torch.tensor (raw_input_token_ids["input_ids"],dtype=torch.int32)
140
+
141
  input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
142
  mask_positions = torch.IntTensor([seq.index(130001) for seq in input_token_ids.tolist()])
143
 
lyraChatGLM/model.py CHANGED
@@ -123,6 +123,7 @@ class ChatGLM6BModel(nn.Module):
123
  self.adapter_inter_size,
124
  self.use_attention_linear_bias,
125
  self.model_path,
 
126
  inference_data_type,
127
  self.shared_contexts_ratio)
128
  self.build_model = True
 
123
  self.adapter_inter_size,
124
  self.use_attention_linear_bias,
125
  self.model_path,
126
+ self.weights_data_type,
127
  inference_data_type,
128
  self.shared_contexts_ratio)
129
  self.build_model = True