Higobeatz commited on
Commit
853cf78
·
1 Parent(s): 1e95c1f

openvoice plugin

Browse files
.ipynb_checkpoints/LICENSE-checkpoint ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
2
+
3
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
4
+
5
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - myshell
6
+ - speech-to-speech
7
+ ---
8
+ <!-- might put a [width=2000 * height=xxx] img here, this size best fits git page
9
+ <img src="resources\cover.png"> -->
10
+ <img src="resources/dreamvoice.png">
11
+
12
+ # DreamVoice: Text-guided Voice Conversion
13
+
14
+ --------------------
15
+
16
+ ## Introduction
17
+
18
+ DreamVoice is an innovative approach to voice conversion (VC) that leverages text-guided generation to create personalized and versatile voice experiences.
19
+ Unlike traditional VC methods, which require a target recording during inference, DreamVoice introduces a more intuitive solution by allowing users to specify desired voice timbres through text prompts.
20
+
21
+ For more details, please check our interspeech paper: [DreamVoice](https://arxiv.org/abs/2406.16314)
22
+
23
+ To listen to demos and download dataset, please check dreamvoice's homepage: [Homepage](https://haidog-yaqub.github.io/dreamvoice_demo/)
24
+
25
+
26
+ # Model Usage
27
+
28
+ To load the models, you need to install packages:
29
+
30
+ ```
31
+ pip install -r requirements.txt
32
+ ```
33
+
34
+ Then you can use the model with the following code:
35
+
36
+ - NEW! DreamVoice Plugin for OpenVoice (DreamVG + [Opnevoice](https://github.com/myshell-ai/OpenVoice))
37
+
38
+ ```python
39
+ import torch
40
+ from dreamvoice import DreamVoice_Plugin
41
+ from dreamvoice.openvoice_utils import se_extractor
42
+ from openvoice.api import ToneColorConverter
43
+
44
+ # init dreamvoice
45
+ dreamvoice = DreamVoice_Plugin(device='cuda')
46
+
47
+ # init openvoice
48
+ ckpt_converter = 'checkpoints_v2/converter'
49
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
50
+ openvoice = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
51
+ openvoice.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
52
+
53
+ # generate speaker
54
+ prompt = 'cute female girl voice'
55
+ target_se = dreamvoice.gen_spk(prompt)
56
+ target_se = target_se.unsqueeze(-1)
57
+
58
+ # content source
59
+ source_path = 'examples/test2.wav'
60
+ source_se = se_extractor(source_path, openvoice).to(device)
61
+
62
+ # voice conversion
63
+ encode_message = "@MyShell"
64
+ openvoice.convert(
65
+ audio_src_path=source_path,
66
+ src_se=source_se,
67
+ tgt_se=target_se,
68
+ output_path='output.wav',
69
+ message=encode_message)
70
+ ```
71
+
72
+ - DreamVoice Plugin for Diffusion-based VC model (DreamVG + DiffVC)
73
+
74
+ ```python
75
+ from dreamvoice import DreamVoice
76
+
77
+ # Initialize DreamVoice in plugin mode with CUDA device
78
+ dreamvoice = DreamVoice(mode='plugin', device='cuda')
79
+ # Description of the target voice
80
+ prompt = 'young female voice, sounds young and cute'
81
+ # Provide the path to the content audio and generate the converted audio
82
+ gen_audio, sr = dreamvoice.genvc('examples/test1.wav', prompt)
83
+ # Save the converted audio
84
+ dreamvoice.save_audio('gen1.wav', gen_audio, sr)
85
+
86
+ # Save the speaker embedding if you like the generated voice
87
+ dreamvoice.save_spk_embed('voice_stash1.pt')
88
+ # Load the saved speaker embedding
89
+ dreamvoice.load_spk_embed('voice_stash1.pt')
90
+ # Use the saved speaker embedding for another audio sample
91
+ gen_audio2, sr = dreamvoice.simplevc('examples/test2.wav', use_spk_cache=True)
92
+ dreamvoice.save_audio('gen2.wav', gen_audio2, sr)
93
+ ```
94
+
95
+ - Diffusion-based End-to-end model (DreamVC)
96
+
97
+ ```python
98
+ from dreamvoice import DreamVoice
99
+
100
+ # Initialize DreamVoice in end-to-end mode with CUDA device
101
+ dreamvoice = DreamVoice(mode='end2end', device='cuda')
102
+ # Provide the path to the content audio and generate the converted audio
103
+ gen_end2end, sr = dreamvoice.genvc('examples/test1.wav', prompt)
104
+ # Save the converted audio
105
+ dreamvoice.save_audio('gen_end2end.wav', gen_end2end, sr)
106
+
107
+ # Note: End-to-end mode does not support saving speaker embeddings
108
+ # To use a voice generated in end-to-end mode, switch back to plugin mode
109
+ # and extract the speaker embedding from the generated audio
110
+ # Switch back to plugin mode
111
+ dreamvoice = DreamVoice(mode='plugin', device='cuda')
112
+ # Load the speaker audio from the previously generated file
113
+ gen_end2end2, sr = dreamvoice.simplevc('examples/test2.wav', speaker_audio='gen_end2end.wav')
114
+ # Save the new converted audio
115
+ dreamvoice.save_audio('gen_end2end2.wav', gen_end2end2, sr)
116
+ ```
117
+
118
+ - One-shot Voice Conversion (DiffVC)
119
+
120
+ ```python
121
+ from dreamvoice import DreamVoice
122
+
123
+ # Plugin mode can be used for traditional one-shot voice conversion
124
+ dreamvoice = DreamVoice(mode='plugin', device='cuda')
125
+ # Generate audio using traditional one-shot voice conversion
126
+ gen_tradition, sr = dreamvoice.simplevc('examples/test1.wav', speaker_audio='examples/speaker.wav')
127
+ # Save the converted audio
128
+ dreamvoice.save_audio('gen_tradition.wav', gen_tradition, sr)
129
+ ```
130
+
131
+ ## Reference
132
+
133
+ If you find the code useful for your research, please consider citing:
134
+
135
+ ```bibtex
136
+ @article{hai2024dreamvoice,
137
+ title={DreamVoice: Text-Guided Voice Conversion},
138
+ author={Hai, Jiarui and Thakkar, Karan and Wang, Helin and Qin, Zengyi and Elhilali, Mounya},
139
+ journal={arXiv preprint arXiv:2406.16314},
140
+ year={2024}
141
+ }
142
+ ```
.ipynb_checkpoints/example-checkpoint.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dreamvoice import DreamVoice
2
+
3
+ # Plugin mode (DreamVG + ReDiffVC)
4
+ # Initialize DreamVoice in plugin mode with CUDA device
5
+ dreamvoice = DreamVoice(mode='plugin', device='cuda')
6
+ # Description of the target voice
7
+ prompt = 'young female voice, sounds young and cute'
8
+ # Provide the path to the content audio and generate the converted audio
9
+ gen_audio, sr = dreamvoice.genvc('examples/test1.wav', prompt)
10
+ # Save the converted audio
11
+ dreamvoice.save_audio('gen1.wav', gen_audio, sr)
12
+
13
+ # Save the speaker embedding if you like the generated voice
14
+ dreamvoice.save_spk_embed('voice_stash1.pt')
15
+ # Load the saved speaker embedding
16
+ dreamvoice.load_spk_embed('voice_stash1.pt')
17
+ # Use the saved speaker embedding for another audio sample
18
+ gen_audio2, sr = dreamvoice.simplevc('examples/test2.wav', use_spk_cache=True)
19
+ dreamvoice.save_audio('gen2.wav', gen_audio2, sr)
20
+
21
+
22
+ # End-to-end mode (DreamVC)
23
+ # Initialize DreamVoice in end-to-end mode with CUDA device
24
+ dreamvoice = DreamVoice(mode='end2end', device='cuda')
25
+ # Provide the path to the content audio and generate the converted audio
26
+ gen_end2end, sr = dreamvoice.genvc('examples/test1.wav', prompt)
27
+ # Save the converted audio
28
+ dreamvoice.save_audio('gen_end2end.wav', gen_end2end, sr)
29
+
30
+ # Note: End-to-end mode does not support saving speaker embeddings
31
+ # To use a voice generated in end-to-end mode, switch back to plugin mode
32
+ # and extract the speaker embedding from the generated audio
33
+ # Switch back to plugin mode
34
+ dreamvoice = DreamVoice(mode='plugin', device='cuda')
35
+ # Load the speaker audio from the previously generated file
36
+ gen_end2end2, sr = dreamvoice.simplevc('examples/test2.wav', speaker_audio='gen_end2end.wav')
37
+ # Save the new converted audio
38
+ dreamvoice.save_audio('gen_end2end2.wav', gen_end2end2, sr)
39
+
40
+
41
+ # Traditional VC
42
+ # Plugin mode can be used for traditional one-shot voice conversion
43
+ dreamvoice = DreamVoice(mode='plugin', device='cuda')
44
+ # Generate audio using traditional one-shot voice conversion
45
+ gen_tradition, sr = dreamvoice.simplevc('examples/test1.wav', speaker_audio='examples/speaker.wav')
46
+ # Save the converted audio
47
+ dreamvoice.save_audio('gen_tradition.wav', gen_tradition, sr)
.ipynb_checkpoints/openvoice_example-checkpoint.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from dreamvoice import DreamVoice_Plugin
3
+ from dreamvoice.openvoice_utils import se_extractor
4
+ from openvoice.api import ToneColorConverter
5
+
6
+
7
+ # init dreamvoice
8
+ dreamvoice = DreamVoice_Plugin(device='cuda')
9
+
10
+ # init openvoice
11
+ ckpt_converter = 'checkpoints_v2/converter'
12
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
13
+ openvoice = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
14
+ openvoice.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
15
+
16
+ # generate speaker
17
+ prompt = 'cute female girl voice'
18
+ target_se = dreamvoice.gen_spk(prompt)
19
+ target_se = target_se.unsqueeze(-1)
20
+
21
+ # content source
22
+ source_path = 'examples/test2.wav'
23
+ source_se = se_extractor(source_path, openvoice).to(device)
24
+
25
+ # voice conversion
26
+ encode_message = "@MyShell"
27
+ openvoice.convert(
28
+ audio_src_path=source_path,
29
+ src_se=source_se,
30
+ tgt_se=target_se,
31
+ output_path='output.wav',
32
+ message=encode_message)
README.md CHANGED
@@ -20,10 +20,7 @@ Unlike traditional VC methods, which require a target recording during inference
20
 
21
  For more details, please check our interspeech paper: [DreamVoice](https://arxiv.org/abs/2406.16314)
22
 
23
-
24
- ## Demo
25
-
26
- 🎵 Listen to [examples](https://haidog-yaqub.github.io/dreamvoice_demo/)
27
 
28
 
29
  # Model Usage
@@ -36,7 +33,43 @@ pip install -r requirements.txt
36
 
37
  Then you can use the model with the following code:
38
 
39
- - Plugin mode (DreamVG + DiffVC)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  ```python
42
  from dreamvoice import DreamVoice
@@ -58,7 +91,8 @@ dreamvoice.load_spk_embed('voice_stash1.pt')
58
  gen_audio2, sr = dreamvoice.simplevc('examples/test2.wav', use_spk_cache=True)
59
  dreamvoice.save_audio('gen2.wav', gen_audio2, sr)
60
  ```
61
- - End-to-end mode (DreamVC)
 
62
 
63
  ```python
64
  from dreamvoice import DreamVoice
 
20
 
21
  For more details, please check our interspeech paper: [DreamVoice](https://arxiv.org/abs/2406.16314)
22
 
23
+ To listen to demos and download dataset, please check dreamvoice's homepage: [Homepage](https://haidog-yaqub.github.io/dreamvoice_demo/)
 
 
 
24
 
25
 
26
  # Model Usage
 
33
 
34
  Then you can use the model with the following code:
35
 
36
+ - NEW! DreamVoice Plugin for OpenVoice (DreamVG + [Opnevoice](https://github.com/myshell-ai/OpenVoice))
37
+
38
+ ```python
39
+ import torch
40
+ from dreamvoice import DreamVoice_Plugin
41
+ from dreamvoice.openvoice_utils import se_extractor
42
+ from openvoice.api import ToneColorConverter
43
+
44
+ # init dreamvoice
45
+ dreamvoice = DreamVoice_Plugin(device='cuda')
46
+
47
+ # init openvoice
48
+ ckpt_converter = 'checkpoints_v2/converter'
49
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
50
+ openvoice = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
51
+ openvoice.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
52
+
53
+ # generate speaker
54
+ prompt = 'cute female girl voice'
55
+ target_se = dreamvoice.gen_spk(prompt)
56
+ target_se = target_se.unsqueeze(-1)
57
+
58
+ # content source
59
+ source_path = 'examples/test2.wav'
60
+ source_se = se_extractor(source_path, openvoice).to(device)
61
+
62
+ # voice conversion
63
+ encode_message = "@MyShell"
64
+ openvoice.convert(
65
+ audio_src_path=source_path,
66
+ src_se=source_se,
67
+ tgt_se=target_se,
68
+ output_path='output.wav',
69
+ message=encode_message)
70
+ ```
71
+
72
+ - DreamVoice Plugin for Diffusion-based VC model (DreamVG + DiffVC)
73
 
74
  ```python
75
  from dreamvoice import DreamVoice
 
91
  gen_audio2, sr = dreamvoice.simplevc('examples/test2.wav', use_spk_cache=True)
92
  dreamvoice.save_audio('gen2.wav', gen_audio2, sr)
93
  ```
94
+
95
+ - Diffusion-based End-to-end model (DreamVC)
96
 
97
  ```python
98
  from dreamvoice import DreamVoice
openvoice_example.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from dreamvoice import DreamVoice_Plugin
3
+ from dreamvoice.openvoice_utils import se_extractor
4
+ from openvoice.api import ToneColorConverter
5
+
6
+
7
+ # init dreamvoice
8
+ dreamvoice = DreamVoice_Plugin(device='cuda')
9
+
10
+ # init openvoice
11
+ ckpt_converter = 'checkpoints_v2/converter'
12
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
13
+ openvoice = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
14
+ openvoice.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
15
+
16
+ # generate speaker
17
+ prompt = 'cute female girl voice'
18
+ target_se = dreamvoice.gen_spk(prompt)
19
+ target_se = target_se.unsqueeze(-1)
20
+
21
+ # content source
22
+ source_path = 'examples/test2.wav'
23
+ source_se = se_extractor(source_path, openvoice).to(device)
24
+
25
+ # voice conversion
26
+ encode_message = "@MyShell"
27
+ openvoice.convert(
28
+ audio_src_path=source_path,
29
+ src_se=source_se,
30
+ tgt_se=target_se,
31
+ output_path='output.wav',
32
+ message=encode_message)
output.wav ADDED
Binary file (213 kB). View file