Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Commit 
							
							Β·
						
						b32773b
	
1
								Parent(s):
							
							64f5a64
								
Delete TTS/vocoder/models/deepmind_version.py
Browse files
    	
        TTS/vocoder/models/deepmind_version.py
    DELETED
    
    | 
         @@ -1,170 +0,0 @@ 
     | 
|
| 1 | 
         
            -
            import torch
         
     | 
| 2 | 
         
            -
            import torch.nn as nn
         
     | 
| 3 | 
         
            -
            import torch.nn.functional as F
         
     | 
| 4 | 
         
            -
            from utils.display import *
         
     | 
| 5 | 
         
            -
            from utils.dsp import *
         
     | 
| 6 | 
         
            -
             
     | 
| 7 | 
         
            -
             
     | 
| 8 | 
         
            -
            class WaveRNN(nn.Module) :
         
     | 
| 9 | 
         
            -
                def __init__(self, hidden_size=896, quantisation=256) :
         
     | 
| 10 | 
         
            -
                    super(WaveRNN, self).__init__()
         
     | 
| 11 | 
         
            -
                    
         
     | 
| 12 | 
         
            -
                    self.hidden_size = hidden_size
         
     | 
| 13 | 
         
            -
                    self.split_size = hidden_size // 2
         
     | 
| 14 | 
         
            -
                    
         
     | 
| 15 | 
         
            -
                    # The main matmul
         
     | 
| 16 | 
         
            -
                    self.R = nn.Linear(self.hidden_size, 3 * self.hidden_size, bias=False)
         
     | 
| 17 | 
         
            -
                    
         
     | 
| 18 | 
         
            -
                    # Output fc layers
         
     | 
| 19 | 
         
            -
                    self.O1 = nn.Linear(self.split_size, self.split_size)
         
     | 
| 20 | 
         
            -
                    self.O2 = nn.Linear(self.split_size, quantisation)
         
     | 
| 21 | 
         
            -
                    self.O3 = nn.Linear(self.split_size, self.split_size)
         
     | 
| 22 | 
         
            -
                    self.O4 = nn.Linear(self.split_size, quantisation)
         
     | 
| 23 | 
         
            -
                    
         
     | 
| 24 | 
         
            -
                    # Input fc layers
         
     | 
| 25 | 
         
            -
                    self.I_coarse = nn.Linear(2, 3 * self.split_size, bias=False)
         
     | 
| 26 | 
         
            -
                    self.I_fine = nn.Linear(3, 3 * self.split_size, bias=False)
         
     | 
| 27 | 
         
            -
             
     | 
| 28 | 
         
            -
                    # biases for the gates
         
     | 
| 29 | 
         
            -
                    self.bias_u = nn.Parameter(torch.zeros(self.hidden_size))
         
     | 
| 30 | 
         
            -
                    self.bias_r = nn.Parameter(torch.zeros(self.hidden_size))
         
     | 
| 31 | 
         
            -
                    self.bias_e = nn.Parameter(torch.zeros(self.hidden_size))
         
     | 
| 32 | 
         
            -
                    
         
     | 
| 33 | 
         
            -
                    # display num params
         
     | 
| 34 | 
         
            -
                    self.num_params()
         
     | 
| 35 | 
         
            -
             
     | 
| 36 | 
         
            -
                    
         
     | 
| 37 | 
         
            -
                def forward(self, prev_y, prev_hidden, current_coarse) :
         
     | 
| 38 | 
         
            -
                    
         
     | 
| 39 | 
         
            -
                    # Main matmul - the projection is split 3 ways
         
     | 
| 40 | 
         
            -
                    R_hidden = self.R(prev_hidden)
         
     | 
| 41 | 
         
            -
                    R_u, R_r, R_e, = torch.split(R_hidden, self.hidden_size, dim=1)
         
     | 
| 42 | 
         
            -
                    
         
     | 
| 43 | 
         
            -
                    # Project the prev input 
         
     | 
| 44 | 
         
            -
                    coarse_input_proj = self.I_coarse(prev_y)
         
     | 
| 45 | 
         
            -
                    I_coarse_u, I_coarse_r, I_coarse_e = \
         
     | 
| 46 | 
         
            -
                        torch.split(coarse_input_proj, self.split_size, dim=1)
         
     | 
| 47 | 
         
            -
                    
         
     | 
| 48 | 
         
            -
                    # Project the prev input and current coarse sample
         
     | 
| 49 | 
         
            -
                    fine_input = torch.cat([prev_y, current_coarse], dim=1)
         
     | 
| 50 | 
         
            -
                    fine_input_proj = self.I_fine(fine_input)
         
     | 
| 51 | 
         
            -
                    I_fine_u, I_fine_r, I_fine_e = \
         
     | 
| 52 | 
         
            -
                        torch.split(fine_input_proj, self.split_size, dim=1)
         
     | 
| 53 | 
         
            -
                    
         
     | 
| 54 | 
         
            -
                    # concatenate for the gates
         
     | 
| 55 | 
         
            -
                    I_u = torch.cat([I_coarse_u, I_fine_u], dim=1)
         
     | 
| 56 | 
         
            -
                    I_r = torch.cat([I_coarse_r, I_fine_r], dim=1)
         
     | 
| 57 | 
         
            -
                    I_e = torch.cat([I_coarse_e, I_fine_e], dim=1)
         
     | 
| 58 | 
         
            -
                    
         
     | 
| 59 | 
         
            -
                    # Compute all gates for coarse and fine 
         
     | 
| 60 | 
         
            -
                    u = F.sigmoid(R_u + I_u + self.bias_u)
         
     | 
| 61 | 
         
            -
                    r = F.sigmoid(R_r + I_r + self.bias_r)
         
     | 
| 62 | 
         
            -
                    e = F.tanh(r * R_e + I_e + self.bias_e)
         
     | 
| 63 | 
         
            -
                    hidden = u * prev_hidden + (1. - u) * e
         
     | 
| 64 | 
         
            -
                    
         
     | 
| 65 | 
         
            -
                    # Split the hidden state
         
     | 
| 66 | 
         
            -
                    hidden_coarse, hidden_fine = torch.split(hidden, self.split_size, dim=1)
         
     | 
| 67 | 
         
            -
                    
         
     | 
| 68 | 
         
            -
                    # Compute outputs 
         
     | 
| 69 | 
         
            -
                    out_coarse = self.O2(F.relu(self.O1(hidden_coarse)))
         
     | 
| 70 | 
         
            -
                    out_fine = self.O4(F.relu(self.O3(hidden_fine)))
         
     | 
| 71 | 
         
            -
             
     | 
| 72 | 
         
            -
                    return out_coarse, out_fine, hidden
         
     | 
| 73 | 
         
            -
                
         
     | 
| 74 | 
         
            -
                    
         
     | 
| 75 | 
         
            -
                def generate(self, seq_len):
         
     | 
| 76 | 
         
            -
                    with torch.no_grad():
         
     | 
| 77 | 
         
            -
                        # First split up the biases for the gates 
         
     | 
| 78 | 
         
            -
                        b_coarse_u, b_fine_u = torch.split(self.bias_u, self.split_size)
         
     | 
| 79 | 
         
            -
                        b_coarse_r, b_fine_r = torch.split(self.bias_r, self.split_size)
         
     | 
| 80 | 
         
            -
                        b_coarse_e, b_fine_e = torch.split(self.bias_e, self.split_size)
         
     | 
| 81 | 
         
            -
             
     | 
| 82 | 
         
            -
                        # Lists for the two output seqs
         
     | 
| 83 | 
         
            -
                        c_outputs, f_outputs = [], []
         
     | 
| 84 | 
         
            -
             
     | 
| 85 | 
         
            -
                        # Some initial inputs
         
     | 
| 86 | 
         
            -
                        out_coarse = torch.LongTensor([0]).cuda()
         
     | 
| 87 | 
         
            -
                        out_fine = torch.LongTensor([0]).cuda()
         
     | 
| 88 | 
         
            -
             
     | 
| 89 | 
         
            -
                        # We'll meed a hidden state
         
     | 
| 90 | 
         
            -
                        hidden = self.init_hidden()
         
     | 
| 91 | 
         
            -
             
     | 
| 92 | 
         
            -
                        # Need a clock for display
         
     | 
| 93 | 
         
            -
                        start = time.time()
         
     | 
| 94 | 
         
            -
             
     | 
| 95 | 
         
            -
                        # Loop for generation
         
     | 
| 96 | 
         
            -
                        for i in range(seq_len) :
         
     | 
| 97 | 
         
            -
             
     | 
| 98 | 
         
            -
                            # Split into two hidden states
         
     | 
| 99 | 
         
            -
                            hidden_coarse, hidden_fine = \
         
     | 
| 100 | 
         
            -
                                torch.split(hidden, self.split_size, dim=1)
         
     | 
| 101 | 
         
            -
             
     | 
| 102 | 
         
            -
                            # Scale and concat previous predictions
         
     | 
| 103 | 
         
            -
                            out_coarse = out_coarse.unsqueeze(0).float() / 127.5 - 1.
         
     | 
| 104 | 
         
            -
                            out_fine = out_fine.unsqueeze(0).float() / 127.5 - 1.
         
     | 
| 105 | 
         
            -
                            prev_outputs = torch.cat([out_coarse, out_fine], dim=1)
         
     | 
| 106 | 
         
            -
             
     | 
| 107 | 
         
            -
                            # Project input 
         
     | 
| 108 | 
         
            -
                            coarse_input_proj = self.I_coarse(prev_outputs)
         
     | 
| 109 | 
         
            -
                            I_coarse_u, I_coarse_r, I_coarse_e = \
         
     | 
| 110 | 
         
            -
                                torch.split(coarse_input_proj, self.split_size, dim=1)
         
     | 
| 111 | 
         
            -
             
     | 
| 112 | 
         
            -
                            # Project hidden state and split 6 ways
         
     | 
| 113 | 
         
            -
                            R_hidden = self.R(hidden)
         
     | 
| 114 | 
         
            -
                            R_coarse_u , R_fine_u, \
         
     | 
| 115 | 
         
            -
                            R_coarse_r, R_fine_r, \
         
     | 
| 116 | 
         
            -
                            R_coarse_e, R_fine_e = torch.split(R_hidden, self.split_size, dim=1)
         
     | 
| 117 | 
         
            -
             
     | 
| 118 | 
         
            -
                            # Compute the coarse gates
         
     | 
| 119 | 
         
            -
                            u = F.sigmoid(R_coarse_u + I_coarse_u + b_coarse_u)
         
     | 
| 120 | 
         
            -
                            r = F.sigmoid(R_coarse_r + I_coarse_r + b_coarse_r)
         
     | 
| 121 | 
         
            -
                            e = F.tanh(r * R_coarse_e + I_coarse_e + b_coarse_e)
         
     | 
| 122 | 
         
            -
                            hidden_coarse = u * hidden_coarse + (1. - u) * e
         
     | 
| 123 | 
         
            -
             
     | 
| 124 | 
         
            -
                            # Compute the coarse output
         
     | 
| 125 | 
         
            -
                            out_coarse = self.O2(F.relu(self.O1(hidden_coarse)))
         
     | 
| 126 | 
         
            -
                            posterior = F.softmax(out_coarse, dim=1)
         
     | 
| 127 | 
         
            -
                            distrib = torch.distributions.Categorical(posterior)
         
     | 
| 128 | 
         
            -
                            out_coarse = distrib.sample()
         
     | 
| 129 | 
         
            -
                            c_outputs.append(out_coarse)
         
     | 
| 130 | 
         
            -
             
     | 
| 131 | 
         
            -
                            # Project the [prev outputs and predicted coarse sample]
         
     | 
| 132 | 
         
            -
                            coarse_pred = out_coarse.float() / 127.5 - 1.
         
     | 
| 133 | 
         
            -
                            fine_input = torch.cat([prev_outputs, coarse_pred.unsqueeze(0)], dim=1)
         
     | 
| 134 | 
         
            -
                            fine_input_proj = self.I_fine(fine_input)
         
     | 
| 135 | 
         
            -
                            I_fine_u, I_fine_r, I_fine_e = \
         
     | 
| 136 | 
         
            -
                                torch.split(fine_input_proj, self.split_size, dim=1)
         
     | 
| 137 | 
         
            -
             
     | 
| 138 | 
         
            -
                            # Compute the fine gates
         
     | 
| 139 | 
         
            -
                            u = F.sigmoid(R_fine_u + I_fine_u + b_fine_u)
         
     | 
| 140 | 
         
            -
                            r = F.sigmoid(R_fine_r + I_fine_r + b_fine_r)
         
     | 
| 141 | 
         
            -
                            e = F.tanh(r * R_fine_e + I_fine_e + b_fine_e)
         
     | 
| 142 | 
         
            -
                            hidden_fine = u * hidden_fine + (1. - u) * e
         
     | 
| 143 | 
         
            -
             
     | 
| 144 | 
         
            -
                            # Compute the fine output
         
     | 
| 145 | 
         
            -
                            out_fine = self.O4(F.relu(self.O3(hidden_fine)))
         
     | 
| 146 | 
         
            -
                            posterior = F.softmax(out_fine, dim=1)
         
     | 
| 147 | 
         
            -
                            distrib = torch.distributions.Categorical(posterior)
         
     | 
| 148 | 
         
            -
                            out_fine = distrib.sample()
         
     | 
| 149 | 
         
            -
                            f_outputs.append(out_fine)
         
     | 
| 150 | 
         
            -
             
     | 
| 151 | 
         
            -
                            # Put the hidden state back together
         
     | 
| 152 | 
         
            -
                            hidden = torch.cat([hidden_coarse, hidden_fine], dim=1)
         
     | 
| 153 | 
         
            -
             
     | 
| 154 | 
         
            -
                            # Display progress
         
     | 
| 155 | 
         
            -
                            speed = (i + 1) / (time.time() - start)
         
     | 
| 156 | 
         
            -
                            stream('Gen: %i/%i -- Speed: %i',  (i + 1, seq_len, speed))
         
     | 
| 157 | 
         
            -
             
     | 
| 158 | 
         
            -
                        coarse = torch.stack(c_outputs).squeeze(1).cpu().data.numpy()
         
     | 
| 159 | 
         
            -
                        fine = torch.stack(f_outputs).squeeze(1).cpu().data.numpy()        
         
     | 
| 160 | 
         
            -
                        output = combine_signal(coarse, fine)
         
     | 
| 161 | 
         
            -
                    
         
     | 
| 162 | 
         
            -
                    return output, coarse, fine
         
     | 
| 163 | 
         
            -
             
     | 
| 164 | 
         
            -
                def init_hidden(self, batch_size=1) :
         
     | 
| 165 | 
         
            -
                    return torch.zeros(batch_size, self.hidden_size).cuda()
         
     | 
| 166 | 
         
            -
                
         
     | 
| 167 | 
         
            -
                def num_params(self) :
         
     | 
| 168 | 
         
            -
                    parameters = filter(lambda p: p.requires_grad, self.parameters())
         
     | 
| 169 | 
         
            -
                    parameters = sum([np.prod(p.size()) for p in parameters]) / 1_000_000
         
     | 
| 170 | 
         
            -
                    print('Trainable Parameters: %.3f million' % parameters)
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         |