aframson commited on
Commit
c7409ca
·
1 Parent(s): 095c6bb
Files changed (2) hide show
  1. modelConfig.py +42 -0
  2. modelLM.py +49 -0
modelConfig.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import PretrainedConfig
3
+
4
+ class OBIConfig(PretrainedConfig):
5
+ def __init__(self,
6
+ model_type="OBILanguageModel",
7
+ auto_map={
8
+ "AutoConfig": "modelConfig.OBIConfig",
9
+ "AutoModel": "modelLM.OBILanguageModel",
10
+ "AutoModelForCausalLM": "modelLM.OBILanguageModel",
11
+ "AutoModelForQuestionAnswering": "modelLM.OBILanguageModel"
12
+ },
13
+ vocab_size=1000,
14
+ hidden_size=4,
15
+ num_attention_heads=2,
16
+ num_hidden_layers=2,
17
+ hidden_dropout_prob=0.1,
18
+ block_size=100,
19
+ batch_size=60,
20
+ max_iters=200,
21
+ eval_interval=100,
22
+ learning_rate=0.001,
23
+ device="cpu",
24
+ **kwargs
25
+ )->None:
26
+ super().__init__(**kwargs)
27
+ self.model_type = model_type
28
+ self.auto_map = auto_map
29
+ self.vocab_size = vocab_size
30
+ self.hidden_size = hidden_size
31
+ self.num_attention_heads = num_attention_heads
32
+ self.num_hidden_layers = num_hidden_layers
33
+ self.hidden_dropout_prob = hidden_dropout_prob
34
+ self.block_size = block_size
35
+ self.batch_size = batch_size
36
+ self.max_iters = max_iters
37
+ self.eval_interval = eval_interval
38
+ self.learning_rate = learning_rate
39
+ self.device = device
40
+
41
+
42
+
modelLM.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from transformers.modeling_utils import PreTrainedModel
5
+
6
+ # Define your custom language model class
7
+ class OBILanguageModel(PreTrainedModel):
8
+ def __init__(self, config):
9
+ super(OBILanguageModel,self).__init__(config)
10
+ self.token_embedding_table = nn.Embedding(config.vocab_size, config.hidden_size) # Use length of SentencePiece vocab
11
+ self.position_embedding_table = nn.Embedding(config.block_size, config.hidden_size)
12
+ self.transformer = nn.Transformer(
13
+ d_model=config.hidden_size,
14
+ nhead=config.num_attention_heads,
15
+ num_encoder_layers=config.num_hidden_layers,
16
+ num_decoder_layers=config.num_hidden_layers,
17
+ dim_feedforward=4 * config.hidden_size,
18
+ dropout=config.hidden_dropout_prob,
19
+ activation='gelu'
20
+ )
21
+ self.ln1 = nn.LayerNorm(config.hidden_size)
22
+ self.ln2 = nn.LayerNorm(config.hidden_size)
23
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size) # Use length of SentencePiece vocab
24
+
25
+ def forward(self, idx, targets=None):
26
+ tok_emb = self.token_embedding_table(idx)
27
+ pos_emb = self.position_embedding_table(torch.arange(idx.size(1), device='cpu'))
28
+ x = tok_emb + pos_emb
29
+ x = self.transformer(x, x)
30
+ x = self.ln1(x)
31
+ x = self.ln2(x)
32
+ logits = self.lm_head(x)
33
+
34
+ if targets is None:
35
+ loss = None
36
+ else:
37
+ loss = F.cross_entropy(logits.view(-1, self.config.vocab_size), targets.view(-1))
38
+
39
+ return logits, loss
40
+
41
+ def generate(self, idx, max_new_tokens):
42
+ for _ in range(max_new_tokens):
43
+ idx_cond = idx[:, -self.config.block_size:]
44
+ logits, loss = self(idx_cond)
45
+ logits = logits[:, -1, :]
46
+ probs = F.softmax(logits, dim=-1)
47
+ idx_next = torch.multinomial(probs, num_samples=1)
48
+ idx = torch.cat((idx, idx_next), dim=1)
49
+ return idx