huseinzol05 commited on
Commit
a406da2
1 Parent(s): 0fcb1d9

Upload ConformerEncoder

Browse files
Files changed (3) hide show
  1. config.json +3 -0
  2. conformer.py +30 -6
  3. model.safetensors +1 -1
config.json CHANGED
@@ -12,9 +12,12 @@
12
  "conformer_input_dim": 144,
13
  "conformer_num_heads": 4,
14
  "conformer_num_layers": 8,
 
 
15
  "input_dim": 80,
16
  "model_type": "conformer",
17
  "output_dim": 40,
 
18
  "time_reduction_stride": 4,
19
  "torch_dtype": "float32",
20
  "transformers_version": "4.35.2"
 
12
  "conformer_input_dim": 144,
13
  "conformer_num_heads": 4,
14
  "conformer_num_layers": 8,
15
+ "ctc_loss_reduction": "mean",
16
+ "ctc_zero_infinity": true,
17
  "input_dim": 80,
18
  "model_type": "conformer",
19
  "output_dim": 40,
20
+ "pad_token_id": 39,
21
  "time_reduction_stride": 4,
22
  "torch_dtype": "float32",
23
  "transformers_version": "4.35.2"
conformer.py CHANGED
@@ -2,6 +2,7 @@ from torchaudio.models import Conformer
2
  from torchaudio.models.rnnt import _TimeReduction
3
  from transformers import PretrainedConfig, PreTrainedModel
4
  import torch
 
5
  from typing import List, Tuple, Optional
6
 
7
 
@@ -33,10 +34,33 @@ class ConformerEncoder(PreTrainedModel):
33
  )
34
  self.output_linear = torch.nn.Linear(config.conformer_input_dim, config.output_dim)
35
 
36
- def forward(self, input: torch.Tensor,
37
- lengths: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
38
- time_reduction_out, time_reduction_lengths = self.time_reduction(input, lengths)
39
  input_linear_out = self.input_linear(time_reduction_out)
40
- x, lengths = self.conformer(input_linear_out, time_reduction_lengths)
41
- output_linear_out = self.output_linear(x)
42
- return output_linear_out, lengths
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from torchaudio.models.rnnt import _TimeReduction
3
  from transformers import PretrainedConfig, PreTrainedModel
4
  import torch
5
+ from torch import nn
6
  from typing import List, Tuple, Optional
7
 
8
 
 
34
  )
35
  self.output_linear = torch.nn.Linear(config.conformer_input_dim, config.output_dim)
36
 
37
+ def forward(self, inputs, lengths, labels=None):
38
+ time_reduction_out, time_reduction_lengths = self.time_reduction(inputs, lengths)
 
39
  input_linear_out = self.input_linear(time_reduction_out)
40
+ x, input_lengths = self.conformer(input_linear_out, time_reduction_lengths)
41
+ logits = self.output_linear(x)
42
+
43
+ loss = None
44
+ if labels is not None:
45
+ labels_mask = labels >= 0
46
+ target_lengths = labels_mask.sum(-1)
47
+ flattened_targets = labels.masked_select(labels_mask)
48
+ log_probs = nn.functional.log_softmax(
49
+ logits,
50
+ dim=-1,
51
+ dtype=torch.float32
52
+ ).transpose(0, 1)
53
+
54
+ with torch.backends.cudnn.flags(enabled=False):
55
+ loss = nn.functional.ctc_loss(
56
+ log_probs,
57
+ flattened_targets,
58
+ input_lengths,
59
+ target_lengths,
60
+ blank=self.config.pad_token_id,
61
+ reduction=self.config.ctc_loss_reduction,
62
+ zero_infinity=self.config.ctc_zero_infinity,
63
+ )
64
+
65
+ output = (logits, input_lengths)
66
+ return ((loss,) + output) if loss is not None else output
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c48c1cb2f04deb1086872dfa916773384f36d7a755c2ee220170290851ddfd46
3
  size 15780592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4750d570d1888762e0c5c89883addd1ef8914ff0a46b9c85ea931c982f85285a
3
  size 15780592