ccdv commited on
Commit
3aadd6d
1 Parent(s): 883b99c

add mask_first_token

Browse files
Files changed (3) hide show
  1. README.md +5 -1
  2. config.json +1 -0
  3. modeling_lsg_pegasus.py +5 -0
README.md CHANGED
@@ -50,13 +50,17 @@ You can change various parameters like :
50
  Default parameters work well in practice. If you are short on memory, reduce block sizes, increase sparsity factor and remove dropout in the attention score matrix.
51
 
52
  ```python:
 
 
53
  model = AutoModel.from_pretrained("ccdv/lsg-pegasus-large-4096",
54
  trust_remote_code=True,
55
  num_global_tokens=16,
56
  block_size=64,
57
  sparse_block_size=64,
58
- sparsity_factor=4,
59
  attention_probs_dropout_prob=0.0
 
 
 
60
  )
61
  ```
62
 
50
  Default parameters work well in practice. If you are short on memory, reduce block sizes, increase sparsity factor and remove dropout in the attention score matrix.
51
 
52
  ```python:
53
+ from transformers import AutoModel
54
+
55
  model = AutoModel.from_pretrained("ccdv/lsg-pegasus-large-4096",
56
  trust_remote_code=True,
57
  num_global_tokens=16,
58
  block_size=64,
59
  sparse_block_size=64,
 
60
  attention_probs_dropout_prob=0.0
61
+ sparsity_factor=4,
62
+ sparsity_type="none",
63
+ mask_first_token=True
64
  )
65
  ```
66
 
config.json CHANGED
@@ -50,6 +50,7 @@
50
  },
51
  "length_penalty": 0.8,
52
  "lsh_num_pre_rounds": 1,
 
53
  "max_length": 256,
54
  "max_position_embeddings": 4096,
55
  "model_type": "pegasus",
50
  },
51
  "length_penalty": 0.8,
52
  "lsh_num_pre_rounds": 1,
53
+ "mask_first_token": false,
54
  "max_length": 256,
55
  "max_position_embeddings": 4096,
56
  "model_type": "pegasus",
modeling_lsg_pegasus.py CHANGED
@@ -29,6 +29,7 @@ class LSGPegasusConfig(PegasusConfig):
29
  base_model_prefix="lsg",
30
  block_size=128,
31
  lsh_num_pre_rounds=1,
 
32
  num_global_tokens=1,
33
  pass_global_tokens_to_decoder=True,
34
  pool_with_global=True,
@@ -45,6 +46,7 @@ class LSGPegasusConfig(PegasusConfig):
45
  self.base_model_prefix = base_model_prefix
46
  self.block_size = block_size
47
  self.lsh_num_pre_rounds = lsh_num_pre_rounds
 
48
  self.num_global_tokens = num_global_tokens
49
  self.pass_global_tokens_to_decoder = pass_global_tokens_to_decoder
50
  self.pool_with_global = pool_with_global
@@ -723,6 +725,7 @@ class LSGPegasusEncoder(LSGPegasusPreTrainedModel, PegasusEncoder):
723
  assert hasattr(config, "block_size") and hasattr(config, "adaptive")
724
  self.block_size = config.block_size
725
  self.adaptive = config.adaptive
 
726
  self.pool_with_global = config.pool_with_global
727
  self.pass_global_tokens_to_decoder = config.pass_global_tokens_to_decoder
728
 
@@ -770,6 +773,8 @@ class LSGPegasusEncoder(LSGPegasusPreTrainedModel, PegasusEncoder):
770
 
771
  if attention_mask is None:
772
  attention_mask = torch.ones(n, t, device=inputs_.device)
 
 
773
 
774
  b = self.block_size * 2
775
  pad = t % self.block_size
29
  base_model_prefix="lsg",
30
  block_size=128,
31
  lsh_num_pre_rounds=1,
32
+ mask_first_token=False,
33
  num_global_tokens=1,
34
  pass_global_tokens_to_decoder=True,
35
  pool_with_global=True,
46
  self.base_model_prefix = base_model_prefix
47
  self.block_size = block_size
48
  self.lsh_num_pre_rounds = lsh_num_pre_rounds
49
+ self.mask_first_token = mask_first_token
50
  self.num_global_tokens = num_global_tokens
51
  self.pass_global_tokens_to_decoder = pass_global_tokens_to_decoder
52
  self.pool_with_global = pool_with_global
725
  assert hasattr(config, "block_size") and hasattr(config, "adaptive")
726
  self.block_size = config.block_size
727
  self.adaptive = config.adaptive
728
+ self.mask_first_token = config.mask_first_token
729
  self.pool_with_global = config.pool_with_global
730
  self.pass_global_tokens_to_decoder = config.pass_global_tokens_to_decoder
731
 
773
 
774
  if attention_mask is None:
775
  attention_mask = torch.ones(n, t, device=inputs_.device)
776
+ if self.mask_first_token:
777
+ attention_mask[:,0] = 0
778
 
779
  b = self.block_size * 2
780
  pad = t % self.block_size