Charlie commited on
Commit
af2f908
1 Parent(s): fbed214

Update Readme

Browse files
Files changed (2) hide show
  1. README.md +1 -2
  2. modeling_batgpt.py +0 -50
README.md CHANGED
@@ -9,8 +9,7 @@ pipeline_tag: text-generation
9
  inference: false
10
  ---
11
  # BatGPT-15B-sirius
12
-
13
- <!-- Provide a quick summary of what the model is/does. -->
14
 
15
  ## 介绍 (Introduction)
16
 
 
9
  inference: false
10
  ---
11
  # BatGPT-15B-sirius
12
+ Bidirectional Autoregressive Talker from Generative Pre-trained Transformer
 
13
 
14
  ## 介绍 (Introduction)
15
 
modeling_batgpt.py CHANGED
@@ -940,56 +940,6 @@ class BatGPTForCausalLM(BatGPTPreTrainedModel):
940
  for layer_past in past
941
  )
942
 
943
-
944
- def quantize(self, bits: int):
945
- try:
946
- # from .quantizer import QLinear
947
- from quantizer import QLinear
948
- except ImportError:
949
- raise ImportError(
950
- f"Needs QLinear to run quantize."
951
- )
952
-
953
- for layer in self.model.encoder.layers:
954
- layer.self_attention.query_proj = QLinear(
955
- bits=bits,
956
- weight=layer.self_attention.query_proj.weight,
957
- bias = layer.self_attention.query_proj.bias if self.config.qkv_bias else None,
958
- )
959
- layer.self_attention.key_proj = QLinear(
960
- bits=bits,
961
- weight=layer.self_attention.key_proj.weight,
962
- bias = layer.self_attention.key_proj.bias if self.config.qkv_bias else None,
963
- )
964
- layer.self_attention.value_proj = QLinear(
965
- bits=bits,
966
- weight=layer.self_attention.value_proj.weight,
967
- bias = layer.self_attention.value_proj.bias if self.config.qkv_bias else None,
968
- )
969
- layer.self_attention.dense = QLinear(
970
- bits=bits,
971
- weight=layer.self_attention.dense.weight,
972
- bias = None,
973
- )
974
- layer.mlp.dense_h_to_4h = QLinear(
975
- bits=bits,
976
- weight=layer.mlp.dense_h_to_4h.weight,
977
- bias = None,
978
- )
979
- layer.mlp.dense_4h_to_h = QLinear(
980
- bits=bits,
981
- weight=layer.mlp.dense_4h_to_h.weight,
982
- bias = None,
983
- )
984
- if self.config.mlp_activation == "silu":
985
- layer.mlp.gate_proj = QLinear(
986
- bits=bits,
987
- weight=layer.mlp.gate_proj.weight,
988
- bias = None,
989
- )
990
- return self
991
-
992
-
993
  def process_response(self, response):
994
  response = response.strip()
995
  return response
 
940
  for layer_past in past
941
  )
942
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
943
  def process_response(self, response):
944
  response = response.strip()
945
  return response