Gong Baitao commited on
Commit
693c995
1 Parent(s): b8e0001

Update modeling_cpmbee.py and README.md

Browse files
Files changed (2) hide show
  1. README.md +36 -1
  2. modeling_cpmbee.py +2 -2
README.md CHANGED
@@ -68,4 +68,39 @@ res = model.generate(
68
  )
69
  print(res)
70
 
71
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  )
69
  print(res)
70
 
71
+ ```
72
+
73
+ We suggest to use `bmtrain` to finetune CPM-Bee. Also, you can use `accelerate` and `deepspeed` to finetune CPM-Bee. Here we will give a brief example of a training loop:
74
+
75
+ ```python
76
+ from transformers import AutoTokenizer, AutoModelForCausalLM
77
+ from accelerate import Accelerator
78
+ from torch.utils.data import Dataset, DataLoader
79
+
80
+ accelerator = Accelerator()
81
+
82
+ trainset = Dataset() # Make sure trainset.__getitem__() can get data with correct format like {"input": "...", "<ans>": ""}
83
+ # for details, you can read https://github.com/OpenBMB/CPM-Bee/tree/main/tutorials/basic_task_finetune
84
+ train_loader = DataLoader(trainset, batch_size=1)
85
+
86
+ tokenizer = AutoTokenizer.from_pretrained("openbmb/cpm-bee-1b", trust_remote_code=True)
87
+ model = AutoModelForCausalLM.from_pretrained("openbmb/cpm-bee-1b", trust_remote_code=True).cuda()
88
+
89
+ optimizer = torch.optim.Adam(model.parameters())
90
+
91
+ model, optimizer, train_loader = accelerator.prepare(
92
+ model, optimizer, train_loader
93
+ )
94
+
95
+ for iter, data in enumerate(train_loader):
96
+ optimizer.zero_grad()
97
+
98
+ # change the data to a trainable format
99
+ input_encoded = tokenizer.prepare_for_finetune(data, max_length=512).to(model.device)
100
+
101
+ outputs = model(**input_encoded)
102
+ loss = outputs.loss
103
+ accelerator.backward(loss)
104
+ optimizer.step()
105
+ ```
106
+ You should design your own parallel and mix_precision training strategy on the basis of it.
modeling_cpmbee.py CHANGED
@@ -569,10 +569,10 @@ class CpmBeeRotaryEmbedding(nn.Module):
569
  self.inv_freq = inv_freq.to(config.torch_dtype)
570
 
571
  def forward(self, x: torch.Tensor, x_pos: torch.Tensor):
572
- inv_freq = self.inv_freq.to(device=x.device, dtype=self.dtype)
573
 
574
  x_pos = x_pos * self.distance_scale
575
- freqs = x_pos[..., None].to(self.dtype) * inv_freq[None, :] # (..., dim/2)
576
 
577
  emb = torch.cat((freqs, freqs), dim=-1) # (..., dim)
578
  emb_cos = emb.cos() # (..., dim)
 
569
  self.inv_freq = inv_freq.to(config.torch_dtype)
570
 
571
  def forward(self, x: torch.Tensor, x_pos: torch.Tensor):
572
+ inv_freq = self.inv_freq.to(device=x.device, dtype=x.dtype)
573
 
574
  x_pos = x_pos * self.distance_scale
575
+ freqs = x_pos[..., None] * inv_freq[None, :] # (..., dim/2)
576
 
577
  emb = torch.cat((freqs, freqs), dim=-1) # (..., dim)
578
  emb_cos = emb.cos() # (..., dim)