Gong Baitao commited on
Commit
9542344
1 Parent(s): a5af69e

Update modeling_cpmbee.py and README.md

Browse files
Files changed (2) hide show
  1. README.md +35 -0
  2. modeling_cpmbee.py +2 -2
README.md CHANGED
@@ -68,3 +68,38 @@ res = model.generate(
68
  print(res)
69
 
70
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  print(res)
69
 
70
  ```
71
+
72
+ We suggest to use `bmtrain` to finetune CPM-Bee. Also, you can use `accelerate` and `deepspeed` to finetune CPM-Bee. Here we will give a brief example of a training loop:
73
+
74
+ ```python
75
+ from transformers import AutoTokenizer, AutoModelForCausalLM
76
+ from accelerate import Accelerator
77
+ from torch.utils.data import Dataset, DataLoader
78
+
79
+ accelerator = Accelerator()
80
+
81
+ trainset = Dataset() # Make sure trainset.__getitem__() can get data with correct format like {"input": "...", "<ans>": ""}
82
+ # for details, you can read https://github.com/OpenBMB/CPM-Bee/tree/main/tutorials/basic_task_finetune
83
+ train_loader = DataLoader(trainset, batch_size=1)
84
+
85
+ tokenizer = AutoTokenizer.from_pretrained("openbmb/cpm-bee-1b", trust_remote_code=True)
86
+ model = AutoModelForCausalLM.from_pretrained("openbmb/cpm-bee-1b", trust_remote_code=True).cuda()
87
+
88
+ optimizer = torch.optim.Adam(model.parameters())
89
+
90
+ model, optimizer, train_loader = accelerator.prepare(
91
+ model, optimizer, train_loader
92
+ )
93
+
94
+ for iter, data in enumerate(train_loader):
95
+ optimizer.zero_grad()
96
+
97
+ # change the data to a trainable format
98
+ input_encoded = tokenizer.prepare_for_finetune(data, max_length=512).to(model.device)
99
+
100
+ outputs = model(**input_encoded)
101
+ loss = outputs.loss
102
+ accelerator.backward(loss)
103
+ optimizer.step()
104
+ ```
105
+ You should design your own parallel and mix_precision training strategy on the basis of it.
modeling_cpmbee.py CHANGED
@@ -569,10 +569,10 @@ class CpmBeeRotaryEmbedding(nn.Module):
569
  self.inv_freq = inv_freq.to(config.torch_dtype)
570
 
571
  def forward(self, x: torch.Tensor, x_pos: torch.Tensor):
572
- inv_freq = self.inv_freq.to(device=x.device, dtype=self.dtype)
573
 
574
  x_pos = x_pos * self.distance_scale
575
- freqs = x_pos[..., None].to(self.dtype) * inv_freq[None, :] # (..., dim/2)
576
 
577
  emb = torch.cat((freqs, freqs), dim=-1) # (..., dim)
578
  emb_cos = emb.cos() # (..., dim)
 
569
  self.inv_freq = inv_freq.to(config.torch_dtype)
570
 
571
  def forward(self, x: torch.Tensor, x_pos: torch.Tensor):
572
+ inv_freq = self.inv_freq.to(device=x.device, dtype=x.dtype)
573
 
574
  x_pos = x_pos * self.distance_scale
575
+ freqs = x_pos[..., None] * inv_freq[None, :] # (..., dim/2)
576
 
577
  emb = torch.cat((freqs, freqs), dim=-1) # (..., dim)
578
  emb_cos = emb.cos() # (..., dim)