silver commited on
Commit
82834c6
1 Parent(s): 220f772

remove image tokens from chatglm-6b

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "THUDM/chatglm-6b",
3
  "architectures": [
4
  "ChatGLMModel"
5
  ],
@@ -8,8 +8,8 @@
8
  "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
9
  "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration"
10
  },
11
- "bos_token_id": 150004,
12
- "eos_token_id": 150005,
13
  "hidden_size": 4096,
14
  "inner_hidden_size": 16384,
15
  "layernorm_epsilon": 1e-05,
@@ -21,5 +21,5 @@
21
  "torch_dtype": "float16",
22
  "transformers_version": "4.23.1",
23
  "use_cache": true,
24
- "vocab_size": 150528
25
  }
1
  {
2
+ "_name_or_path": "silver/chatglm-6b-slim",
3
  "architectures": [
4
  "ChatGLMModel"
5
  ],
8
  "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
9
  "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration"
10
  },
11
+ "bos_token_id": 130004,
12
+ "eos_token_id": 130005,
13
  "hidden_size": 4096,
14
  "inner_hidden_size": 16384,
15
  "layernorm_epsilon": 1e-05,
21
  "torch_dtype": "float16",
22
  "transformers_version": "4.23.1",
23
  "use_cache": true,
24
+ "vocab_size": 130528
25
  }
configuration_chatglm.py CHANGED
@@ -12,6 +12,7 @@ class ChatGLMConfig(PretrainedConfig):
12
  It is used to instantiate an ChatGLM model according to the specified arguments, defining the model
13
  architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
14
  the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b) architecture.
 
15
 
16
  Configuration objects inherit from [`PretrainedConfig`] and can be used
17
  to control the model outputs. Read the documentation from [`PretrainedConfig`]
@@ -58,14 +59,14 @@ class ChatGLMConfig(PretrainedConfig):
58
 
59
  def __init__(
60
  self,
61
- vocab_size=150528,
62
  hidden_size=4096,
63
  num_layers=28,
64
  num_attention_heads=32,
65
  layernorm_epsilon=1e-5,
66
  use_cache=False,
67
- bos_token_id=150004,
68
- eos_token_id=150005,
69
  pad_token_id=0,
70
  max_sequence_length=2048,
71
  inner_hidden_size=16384,
12
  It is used to instantiate an ChatGLM model according to the specified arguments, defining the model
13
  architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
14
  the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b) architecture.
15
+ We remove 20K image tokens on top of ChatGLM-6B to save memories.
16
 
17
  Configuration objects inherit from [`PretrainedConfig`] and can be used
18
  to control the model outputs. Read the documentation from [`PretrainedConfig`]
59
 
60
  def __init__(
61
  self,
62
+ vocab_size=130528,
63
  hidden_size=4096,
64
  num_layers=28,
65
  num_attention_heads=32,
66
  layernorm_epsilon=1e-5,
67
  use_cache=False,
68
+ bos_token_id=130004,
69
+ eos_token_id=130005,
70
  pad_token_id=0,
71
  max_sequence_length=2048,
72
  inner_hidden_size=16384,
modeling_chatglm.py CHANGED
@@ -28,7 +28,7 @@ from transformers.utils import logging
28
  from transformers.generation.logits_process import LogitsProcessor
29
  from transformers.generation.utils import LogitsProcessorList
30
 
31
- from .configuration_chatglm import ChatGLMConfig
32
 
33
  # flags required to enable jit fusion kernels
34
  torch._C._jit_set_profiling_mode(False)
@@ -38,12 +38,13 @@ torch._C._jit_override_can_fuse_on_gpu(True)
38
 
39
  logger = logging.get_logger(__name__)
40
 
41
- _CHECKPOINT_FOR_DOC = "THUDM/ChatGLM-6B"
42
  _CONFIG_FOR_DOC = "ChatGLM6BConfig"
43
 
44
  CHATGLM_6B_PRETRAINED_MODEL_ARCHIVE_LIST = [
45
- "THUDM/chatglm-6b",
46
  # See all ChatGLM-6B models at https://huggingface.co/models?filter=chatglm
 
47
  ]
48
 
49
 
@@ -51,7 +52,7 @@ class InvalidScoreLogitsProcessor(LogitsProcessor):
51
  def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
52
  if torch.isnan(scores).any() or torch.isinf(scores).any():
53
  scores.zero_()
54
- scores[..., 20005] = 5e4
55
  return scores
56
 
57
 
@@ -755,7 +756,7 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
755
 
756
  @staticmethod
757
  def get_masks(seq, device):
758
- context_length = seq.index(150004) + 1
759
 
760
  attention_mask = torch.ones((1, len(seq), len(seq)), device=device)
761
  attention_mask.tril_()
@@ -766,9 +767,9 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
766
  return attention_mask
767
 
768
  def get_position_ids(self, seq, mask_position, device, gmask=False):
769
- context_length = seq.index(150004) + 1
770
  if self.position_encoding_2d:
771
- seq_length = seq.index(150004)
772
  position_ids = torch.arange(context_length, dtype=torch.long, device=device)
773
  if not gmask:
774
  position_ids[seq_length:] = mask_position
@@ -824,7 +825,7 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
824
  if past_key_values is None:
825
  past_key_values = tuple([None] * len(self.layers))
826
 
827
- MASK, gMASK = 150000, 150001
828
  mask_token = MASK if MASK in input_ids else gMASK
829
  use_gmask = False if MASK in input_ids else gMASK
830
  seq = input_ids[0].tolist()
@@ -941,7 +942,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
941
  attention_mask = (attention_mask < 0.5).bool()
942
 
943
  if self.position_encoding_2d:
944
- seq_length = seq.index(150004)
945
  position_ids = torch.arange(context_length, dtype=torch.long, device=device)
946
  if not gmask:
947
  position_ids[seq_length:] = mask_position
@@ -968,7 +969,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
968
  **kwargs
969
  ) -> dict:
970
 
971
- MASK, gMASK = 150000, 150001
972
  mask_token = MASK if MASK in input_ids else gMASK
973
  use_gmask = False if MASK in input_ids else gMASK
974
  seq = input_ids[0].tolist()
@@ -979,7 +980,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
979
 
980
  # only last token for input_ids if past is not None
981
  if past is not None or past_key_values is not None:
982
- context_length = seq.index(150004)
983
  last_token = input_ids[:, -1].unsqueeze(-1)
984
  if self.position_encoding_2d:
985
  position_ids = torch.tensor([[[mask_position], [len(seq) - context_length]]], dtype=torch.long,
@@ -1119,8 +1120,8 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
1119
  self,
1120
  **kwargs,
1121
  ):
1122
- MASK, gMASK = 150000, 150001
1123
- bos, eos = 150004, 150005
1124
 
1125
  if "eos_token_id" not in kwargs:
1126
  kwargs["eos_token_id"] = eos
28
  from transformers.generation.logits_process import LogitsProcessor
29
  from transformers.generation.utils import LogitsProcessorList
30
 
31
+ from configuration_chatglm import ChatGLMConfig
32
 
33
  # flags required to enable jit fusion kernels
34
  torch._C._jit_set_profiling_mode(False)
38
 
39
  logger = logging.get_logger(__name__)
40
 
41
+ _CHECKPOINT_FOR_DOC = "silver/ChatGLM-6B"
42
  _CONFIG_FOR_DOC = "ChatGLM6BConfig"
43
 
44
  CHATGLM_6B_PRETRAINED_MODEL_ARCHIVE_LIST = [
45
+ "silver/chatglm-6b-slim",
46
  # See all ChatGLM-6B models at https://huggingface.co/models?filter=chatglm
47
+ # See the slim model at https://huggingface.co/silver/chatglm-6b-slim
48
  ]
49
 
50
 
52
  def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
53
  if torch.isnan(scores).any() or torch.isinf(scores).any():
54
  scores.zero_()
55
+ scores[..., 5] = 5e4
56
  return scores
57
 
58
 
756
 
757
  @staticmethod
758
  def get_masks(seq, device):
759
+ context_length = seq.index(130004) + 1
760
 
761
  attention_mask = torch.ones((1, len(seq), len(seq)), device=device)
762
  attention_mask.tril_()
767
  return attention_mask
768
 
769
  def get_position_ids(self, seq, mask_position, device, gmask=False):
770
+ context_length = seq.index(130004) + 1
771
  if self.position_encoding_2d:
772
+ seq_length = seq.index(130004)
773
  position_ids = torch.arange(context_length, dtype=torch.long, device=device)
774
  if not gmask:
775
  position_ids[seq_length:] = mask_position
825
  if past_key_values is None:
826
  past_key_values = tuple([None] * len(self.layers))
827
 
828
+ MASK, gMASK = 130000, 130001
829
  mask_token = MASK if MASK in input_ids else gMASK
830
  use_gmask = False if MASK in input_ids else gMASK
831
  seq = input_ids[0].tolist()
942
  attention_mask = (attention_mask < 0.5).bool()
943
 
944
  if self.position_encoding_2d:
945
+ seq_length = seq.index(130004)
946
  position_ids = torch.arange(context_length, dtype=torch.long, device=device)
947
  if not gmask:
948
  position_ids[seq_length:] = mask_position
969
  **kwargs
970
  ) -> dict:
971
 
972
+ MASK, gMASK = 130000, 130001
973
  mask_token = MASK if MASK in input_ids else gMASK
974
  use_gmask = False if MASK in input_ids else gMASK
975
  seq = input_ids[0].tolist()
980
 
981
  # only last token for input_ids if past is not None
982
  if past is not None or past_key_values is not None:
983
+ context_length = seq.index(130004)
984
  last_token = input_ids[:, -1].unsqueeze(-1)
985
  if self.position_encoding_2d:
986
  position_ids = torch.tensor([[[mask_position], [len(seq) - context_length]]], dtype=torch.long,
1120
  self,
1121
  **kwargs,
1122
  ):
1123
+ MASK, gMASK = 130000, 130001
1124
+ bos, eos = 130004, 130005
1125
 
1126
  if "eos_token_id" not in kwargs:
1127
  kwargs["eos_token_id"] = eos
pytorch_model-00001-of-00008.bin → pytorch_model-00001-of-00008-slim.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe5bac6bfa5b5404ddfe3fabe04862b785e013afd7b308b7beca08239f9489fa
3
- size 1904491802
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c85647a7f3c817274a767dbee01428a9f1b3eb855cfd7849625b8ad7753e4dbf
3
+ size 1904493208
pytorch_model-00008-of-00008.bin → pytorch_model-00008-of-00008-slim.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e764ebdece24219efeda3c18aa32fe6414da3d1f533df8845815609e9ef7f4a7
3
- size 1233126123
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36e8039413913b7326c4fc5fcbcd2bf4c03b03bea1ff1bdaf6b74b46df0053e9
3
+ size 1233126329
pytorch_model.bin.index.json CHANGED
@@ -3,35 +3,35 @@
3
  "total_size": 13744473856
4
  },
5
  "weight_map": {
6
- "lm_head.weight": "pytorch_model-00008-of-00008.bin",
7
  "transformer.final_layernorm.bias": "pytorch_model-00007-of-00008.bin",
8
  "transformer.final_layernorm.weight": "pytorch_model-00007-of-00008.bin",
9
- "transformer.layers.0.attention.dense.bias": "pytorch_model-00001-of-00008.bin",
10
- "transformer.layers.0.attention.dense.weight": "pytorch_model-00001-of-00008.bin",
11
- "transformer.layers.0.attention.query_key_value.bias": "pytorch_model-00001-of-00008.bin",
12
- "transformer.layers.0.attention.query_key_value.weight": "pytorch_model-00001-of-00008.bin",
13
- "transformer.layers.0.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00008.bin",
14
- "transformer.layers.0.input_layernorm.bias": "pytorch_model-00001-of-00008.bin",
15
- "transformer.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00008.bin",
16
- "transformer.layers.0.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00008.bin",
17
- "transformer.layers.0.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00008.bin",
18
- "transformer.layers.0.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00008.bin",
19
- "transformer.layers.0.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00008.bin",
20
- "transformer.layers.0.post_attention_layernorm.bias": "pytorch_model-00001-of-00008.bin",
21
- "transformer.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00008.bin",
22
- "transformer.layers.1.attention.dense.bias": "pytorch_model-00001-of-00008.bin",
23
- "transformer.layers.1.attention.dense.weight": "pytorch_model-00001-of-00008.bin",
24
- "transformer.layers.1.attention.query_key_value.bias": "pytorch_model-00001-of-00008.bin",
25
- "transformer.layers.1.attention.query_key_value.weight": "pytorch_model-00001-of-00008.bin",
26
- "transformer.layers.1.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00008.bin",
27
- "transformer.layers.1.input_layernorm.bias": "pytorch_model-00001-of-00008.bin",
28
- "transformer.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00008.bin",
29
  "transformer.layers.1.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00008.bin",
30
  "transformer.layers.1.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00008.bin",
31
- "transformer.layers.1.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00008.bin",
32
- "transformer.layers.1.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00008.bin",
33
- "transformer.layers.1.post_attention_layernorm.bias": "pytorch_model-00001-of-00008.bin",
34
- "transformer.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00008.bin",
35
  "transformer.layers.10.attention.dense.bias": "pytorch_model-00003-of-00008.bin",
36
  "transformer.layers.10.attention.dense.weight": "pytorch_model-00003-of-00008.bin",
37
  "transformer.layers.10.attention.query_key_value.bias": "pytorch_model-00003-of-00008.bin",
@@ -370,6 +370,6 @@
370
  "transformer.layers.9.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00008.bin",
371
  "transformer.layers.9.post_attention_layernorm.bias": "pytorch_model-00003-of-00008.bin",
372
  "transformer.layers.9.post_attention_layernorm.weight": "pytorch_model-00003-of-00008.bin",
373
- "transformer.word_embeddings.weight": "pytorch_model-00001-of-00008.bin"
374
  }
375
  }
3
  "total_size": 13744473856
4
  },
5
  "weight_map": {
6
+ "lm_head.weight": "pytorch_model-00008-of-00008-slim.bin",
7
  "transformer.final_layernorm.bias": "pytorch_model-00007-of-00008.bin",
8
  "transformer.final_layernorm.weight": "pytorch_model-00007-of-00008.bin",
9
+ "transformer.layers.0.attention.dense.bias": "pytorch_model-00001-of-00008-slim.bin",
10
+ "transformer.layers.0.attention.dense.weight": "pytorch_model-00001-of-00008-slim.bin",
11
+ "transformer.layers.0.attention.query_key_value.bias": "pytorch_model-00001-of-00008-slim.bin",
12
+ "transformer.layers.0.attention.query_key_value.weight": "pytorch_model-00001-of-00008-slim.bin",
13
+ "transformer.layers.0.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00008-slim.bin",
14
+ "transformer.layers.0.input_layernorm.bias": "pytorch_model-00001-of-00008-slim.bin",
15
+ "transformer.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00008-slim.bin",
16
+ "transformer.layers.0.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00008-slim.bin",
17
+ "transformer.layers.0.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00008-slim.bin",
18
+ "transformer.layers.0.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00008-slim.bin",
19
+ "transformer.layers.0.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00008-slim.bin",
20
+ "transformer.layers.0.post_attention_layernorm.bias": "pytorch_model-00001-of-00008-slim.bin",
21
+ "transformer.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00008-slim.bin",
22
+ "transformer.layers.1.attention.dense.bias": "pytorch_model-00001-of-00008-slim.bin",
23
+ "transformer.layers.1.attention.dense.weight": "pytorch_model-00001-of-00008-slim.bin",
24
+ "transformer.layers.1.attention.query_key_value.bias": "pytorch_model-00001-of-00008-slim.bin",
25
+ "transformer.layers.1.attention.query_key_value.weight": "pytorch_model-00001-of-00008-slim.bin",
26
+ "transformer.layers.1.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00008-slim.bin",
27
+ "transformer.layers.1.input_layernorm.bias": "pytorch_model-00001-of-00008-slim.bin",
28
+ "transformer.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00008-slim.bin",
29
  "transformer.layers.1.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00008.bin",
30
  "transformer.layers.1.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00008.bin",
31
+ "transformer.layers.1.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00008-slim.bin",
32
+ "transformer.layers.1.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00008-slim.bin",
33
+ "transformer.layers.1.post_attention_layernorm.bias": "pytorch_model-00001-of-00008-slim.bin",
34
+ "transformer.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00008-slim.bin",
35
  "transformer.layers.10.attention.dense.bias": "pytorch_model-00003-of-00008.bin",
36
  "transformer.layers.10.attention.dense.weight": "pytorch_model-00003-of-00008.bin",
37
  "transformer.layers.10.attention.query_key_value.bias": "pytorch_model-00003-of-00008.bin",
370
  "transformer.layers.9.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00008.bin",
371
  "transformer.layers.9.post_attention_layernorm.bias": "pytorch_model-00003-of-00008.bin",
372
  "transformer.layers.9.post_attention_layernorm.weight": "pytorch_model-00003-of-00008.bin",
373
+ "transformer.word_embeddings.weight": "pytorch_model-00001-of-00008-slim.bin"
374
  }
375
  }
tokenization_chatglm.py CHANGED
@@ -16,7 +16,7 @@ from transformers.utils import logging
16
  logger = logging.get_logger(__name__)
17
 
18
  PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
19
- "THUDM/chatglm-6b": 2048,
20
  }
21
 
22
 
@@ -85,17 +85,13 @@ class SPTokenizer:
85
  def get_tab_token():
86
  return f"<|tab|>"
87
 
88
- @property
89
- def num_image_tokens(self):
90
- return 20000
91
-
92
  @property
93
  def num_text_tokens(self):
94
  return self.text_tokenizer.num_tokens
95
 
96
  @property
97
  def num_tokens(self):
98
- return self.num_image_tokens + self.num_text_tokens
99
 
100
  @staticmethod
101
  def _encode_whitespaces(text: str, max_len: int = 80):
@@ -125,11 +121,11 @@ class SPTokenizer:
125
  if not add_dummy_prefix:
126
  text = "<n>" + text
127
  tmp = self._get_text_tokenizer(encode_special_tokens=special_tokens).encode(text)
128
- tokens = [x + self.num_image_tokens for x in tmp]
129
  return tokens if add_dummy_prefix else tokens[2:]
130
 
131
  def decode(self, text_ids: List[int], special_tokens=False) -> str:
132
- ids = [int(_id) - self.num_image_tokens for _id in text_ids]
133
  ids = [_id for _id in ids if _id >= 0]
134
  text = self._get_text_tokenizer(encode_special_tokens=special_tokens).decode(ids)
135
  text = text.replace("<n>", "\n")
@@ -156,15 +152,9 @@ class SPTokenizer:
156
 
157
  def __getitem__(self, x: Union[int, str]):
158
  if isinstance(x, int):
159
- if x < self.num_image_tokens:
160
- return "<image_{}>".format(x)
161
- else:
162
- return self.text_tokenizer.convert_id_to_token(x - self.num_image_tokens)
163
  elif isinstance(x, str):
164
- if x.startswith("<image_") and x.endswith(">") and x[7:-1].isdigit():
165
- return int(x[7:-1])
166
- else:
167
- return self.text_tokenizer.convert_token_to_id(x) + self.num_image_tokens
168
  else:
169
  raise ValueError("The key should be str or int.")
170
 
16
  logger = logging.get_logger(__name__)
17
 
18
  PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
19
+ "silver/chatglm-6b-slim": 2048,
20
  }
21
 
22
 
85
  def get_tab_token():
86
  return f"<|tab|>"
87
 
 
 
 
 
88
  @property
89
  def num_text_tokens(self):
90
  return self.text_tokenizer.num_tokens
91
 
92
  @property
93
  def num_tokens(self):
94
+ return self.num_text_tokens
95
 
96
  @staticmethod
97
  def _encode_whitespaces(text: str, max_len: int = 80):
121
  if not add_dummy_prefix:
122
  text = "<n>" + text
123
  tmp = self._get_text_tokenizer(encode_special_tokens=special_tokens).encode(text)
124
+ tokens = [x for x in tmp]
125
  return tokens if add_dummy_prefix else tokens[2:]
126
 
127
  def decode(self, text_ids: List[int], special_tokens=False) -> str:
128
+ ids = [int(_id) for _id in text_ids]
129
  ids = [_id for _id in ids if _id >= 0]
130
  text = self._get_text_tokenizer(encode_special_tokens=special_tokens).decode(ids)
131
  text = text.replace("<n>", "\n")
152
 
153
  def __getitem__(self, x: Union[int, str]):
154
  if isinstance(x, int):
155
+ return self.text_tokenizer.convert_id_to_token(x)
 
 
 
156
  elif isinstance(x, str):
157
+ return self.text_tokenizer.convert_token_to_id(x)
 
 
 
158
  else:
159
  raise ValueError("The key should be str or int.")
160
 
tokenizer_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "name_or_path": "THUDM/chatglm-6b",
3
  "bos_token": "<sop>",
4
  "eop_token": "<eop>",
5
  "eos_token": "</s>",
1
  {
2
+ "name_or_path": "silver/chatglm-6b-slim",
3
  "bos_token": "<sop>",
4
  "eop_token": "<eop>",
5
  "eos_token": "</s>",