Higobeatz commited on
Commit
20634a1
·
verified ·
1 Parent(s): f8f61a2

Delete dreamvoice/src/feats/.ipynb_checkpoints

Browse files
dreamvoice/src/feats/.ipynb_checkpoints/contentvec-checkpoint.py DELETED
@@ -1,42 +0,0 @@
1
- import torch
2
- import librosa
3
- from fairseq import checkpoint_utils
4
- import torch.nn.functional as F
5
-
6
-
7
- def get_model(vec_path):
8
- print("load model(s) from {}".format(vec_path))
9
- models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
10
- [vec_path],
11
- suffix="",
12
- )
13
- model = models[0]
14
- model.eval()
15
- return model
16
-
17
-
18
- @torch.no_grad()
19
- def get_content(hmodel, wav_16k_tensor, device='cuda', layer=12):
20
- # print(layer)
21
- wav_16k_tensor = wav_16k_tensor.to(device)
22
- # so that the output shape will be len(audio//320)
23
- wav_16k_tensor = F.pad(wav_16k_tensor, ((400 - 320) // 2, (400 - 320) // 2))
24
- feats = wav_16k_tensor
25
- padding_mask = torch.BoolTensor(feats.shape).fill_(False)
26
- inputs = {
27
- "source": feats.to(wav_16k_tensor.device),
28
- "padding_mask": padding_mask.to(wav_16k_tensor.device),
29
- "output_layer": layer
30
- }
31
- logits = hmodel.extract_features(**inputs)[0]
32
- # feats = hmodel.final_proj(logits[0])
33
- return logits
34
-
35
-
36
- if __name__ == '__main__':
37
- audio, sr = librosa.load('test.wav', sr=16000)
38
- audio = audio[:100*320]
39
- model = get_model('../../ckpts/checkpoint_best_legacy_500.pt')
40
- model = model.cuda()
41
- content = get_content(model, torch.tensor([audio]))
42
- print(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dreamvoice/src/feats/.ipynb_checkpoints/contentvec_hf-checkpoint.py DELETED
@@ -1,40 +0,0 @@
1
- from transformers import HubertModel
2
- import torch.nn as nn
3
- import torch
4
- import torch.nn.functional as F
5
- import librosa
6
-
7
-
8
- class HubertModelWithFinalProj(HubertModel):
9
- def __init__(self, config):
10
- super().__init__(config)
11
-
12
- # The final projection layer is only used for backward compatibility.
13
- # Following https://github.com/auspicious3000/contentvec/issues/6
14
- # Remove this layer is necessary to achieve the desired outcome.
15
- self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size)
16
-
17
-
18
- def get_content_model(config='lengyue233/content-vec-best'):
19
- model = HubertModelWithFinalProj.from_pretrained(config)
20
- model.eval()
21
- return model
22
-
23
-
24
- @torch.no_grad()
25
- def get_content(model, wav_16k_tensor, device='cuda'):
26
- # print(layer)
27
- wav_16k_tensor = wav_16k_tensor.to(device)
28
- # so that the output shape will be len(audio//320)
29
- wav_16k_tensor = F.pad(wav_16k_tensor, ((400 - 320) // 2, (400 - 320) // 2))
30
- logits = model(wav_16k_tensor)['last_hidden_state']
31
- return logits
32
-
33
-
34
- if __name__ == '__main__':
35
- model = get_content_model().cuda()
36
- audio, sr = librosa.load('test.wav', sr=16000)
37
- audio = audio[:100*320]
38
- audio = torch.tensor([audio])
39
- content = get_content(model, audio, 'cuda')
40
- print(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dreamvoice/src/feats/.ipynb_checkpoints/hubert_model-checkpoint.py DELETED
@@ -1,24 +0,0 @@
1
- import torch, torchaudio
2
- from .hubert.hubert import HubertSoft
3
- from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
4
- import librosa
5
-
6
-
7
- def get_soft_model(model_path):
8
- hubert = HubertSoft()
9
- # Load checkpoint (either hubert_soft or hubert_discrete)
10
- # hubert = torch.hub.load("bshall/hubert:main", "hubert_soft", trust_repo=True)
11
- checkpoint = torch.load(model_path)
12
- consume_prefix_in_state_dict_if_present(checkpoint["hubert"], "module.")
13
- hubert.load_state_dict(checkpoint["hubert"])
14
- hubert.eval()
15
- return hubert
16
-
17
-
18
- @torch.no_grad()
19
- def get_hubert_soft_content(hmodel, wav_16k_tensor, device='cuda'):
20
- wav_16k_tensor = wav_16k_tensor.to(device).unsqueeze(1)
21
- # print(wav_16k_tensor.shape)
22
- units = hmodel.units(wav_16k_tensor)
23
- # print(units.shape)
24
- return units.cpu()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dreamvoice/src/feats/.ipynb_checkpoints/test-checkpoint.py DELETED
@@ -1,22 +0,0 @@
1
- import torch, torchaudio
2
- from hubert.hubert import HubertSoft
3
- from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
4
- import librosa
5
-
6
-
7
- def get_soft_model(model_path):
8
- hubert = HubertSoft()
9
- # Load checkpoint (either hubert_soft or hubert_discrete)
10
- # hubert = torch.hub.load("bshall/hubert:main", "hubert_soft", trust_repo=True)
11
- checkpoint = torch.load(model_path)
12
- consume_prefix_in_state_dict_if_present(checkpoint["hubert"], "module.")
13
- hubert.load_state_dict(checkpoint["hubert"])
14
- hubert.eval()
15
- return model
16
-
17
-
18
- @torch.no_grad()
19
- def get_hubert_soft_content(hmodel, wav_16k_tensor, device='cuda'):
20
- wav_16k_tensor = wav_16k_tensor.to(device)
21
- units = hmodel.units(wav_16k_tensor)
22
- return units.cpu()