svakhreev commited on
Commit
20f9874
1 Parent(s): 24b6124

Upload GPTRefactForCausalLM (#5)

Browse files

- Upload GPTRefactForCausalLM (a4ba9934cfef545c1a36eed0f94b50b431bbd746)

configuration_gpt_refact.py CHANGED
@@ -32,7 +32,7 @@ class GPTRefactConfig(PretrainedConfig):
32
  use_cache=True,
33
  bos_token_id=-1,
34
  eos_token_id=0,
35
- max_position_embeddings: int = 2048,
36
  multi_query: bool = True,
37
  attention_softmax_in_fp32=False,
38
  scale_attention_softmax_in_fp32=False,
 
32
  use_cache=True,
33
  bos_token_id=-1,
34
  eos_token_id=0,
35
+ max_position_embeddings: int = 4096,
36
  multi_query: bool = True,
37
  attention_softmax_in_fp32=False,
38
  scale_attention_softmax_in_fp32=False,
modeling_gpt_refact.py CHANGED
@@ -346,9 +346,10 @@ class GPTRefactModel(GPTRefactPreTrainedModel):
346
 
347
  self.h = nn.ModuleList([GPTRefactBlock(config, layer_idx=i) for i in range(config.num_hidden_layers)])
348
 
349
- max_positions = config.max_position_embeddings
350
  self.register_buffer(
351
- "bias", torch.tril(torch.ones((max_positions, max_positions), dtype=torch.bool)), persistent=False
 
352
  )
353
 
354
  self.gradient_checkpointing = False
 
346
 
347
  self.h = nn.ModuleList([GPTRefactBlock(config, layer_idx=i) for i in range(config.num_hidden_layers)])
348
 
349
+ self.max_positions = config.max_position_embeddings
350
  self.register_buffer(
351
+ "bias", torch.tril(torch.ones((self.max_positions, self.max_positions), dtype=torch.bool)),
352
+ persistent=False
353
  )
354
 
355
  self.gradient_checkpointing = False
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2369c7e2228204ac8e0bc39c048d1e6349ce5f1bab8005a60bde0f0aa26ca73
3
  size 6343461637
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81388e4a168bb437a7a09af6c8b6c2943990276ee62c2f449cd2bdff257e8860
3
  size 6343461637