SohomToom commited on
Commit
0c5c249
·
verified ·
1 Parent(s): 24c9e51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -23
app.py CHANGED
@@ -1,36 +1,61 @@
1
- import os
2
- os.environ["NUMBA_DISABLE_CACHE"] = "1"
3
  import gradio as gr
4
  import os
 
 
 
5
  import torch
 
 
6
 
7
- # Add openvoice path
8
- import sys
9
- sys.path.append("openvoice")
 
10
 
11
- from openvoice.api import ToneColorConverter
12
- from openvoice.inference import voice_conversion
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # Set up paths
15
- ckpt_converter = './checkpoints/converter'
16
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
17
 
18
- converter = ToneColorConverter(f"{ckpt_converter}/config.json", device=device)
19
- converter.load_ckpt(f"{ckpt_converter}/converter.ckpt")
20
 
21
- def convert_voice(audio_file, text_prompt):
22
- output_path = "./results/output.wav"
23
- # You must clone reference audio using clone.sh or similar step in Dockerfile
24
- voice_conversion(converter, audio_file.name, text_prompt, output_path, device)
25
- return output_path
26
 
27
- iface = gr.Interface(
28
- fn=convert_voice,
29
  inputs=[
30
- gr.Audio(type="filepath", label="Input Voice (WAV)"),
31
- gr.Textbox(label="Prompt (e.g., 'Speak in a cheerful tone')"),
32
  ],
33
- outputs=gr.Audio(label="Converted Voice")
 
 
34
  )
35
 
36
- iface.launch()
 
 
 
 
1
  import gradio as gr
2
  import os
3
+ from openvoice.api import ToneColorConverter
4
+ from openvoice import se_extractor
5
+ from inference import infer_tool
6
  import torch
7
+ import time
8
+ import uuid
9
 
10
+ # Set model paths
11
+ ckpt_converter = "checkpoints/converter"
12
+ output_dir = "outputs"
13
+ os.makedirs(output_dir, exist_ok=True)
14
 
15
+ # Initialize converter
16
+ tone_color_converter = ToneColorConverter(ckpt_converter)
17
+
18
+ # Load base speaker embedding for style transfer
19
+ ref_speaker_embed = None
20
+
21
+ def clone_and_speak(text, speaker_wav):
22
+ if not speaker_wav:
23
+ return "Please upload a reference .wav file."
24
+
25
+ # Generate a unique filename
26
+ timestamp = str(int(time.time()))
27
+ base_name = f"output_{timestamp}_{uuid.uuid4().hex[:6]}"
28
+ output_wav = os.path.join(output_dir, f"{base_name}.wav")
29
+
30
+ # Extract style from uploaded speaker voice
31
+ global ref_speaker_embed
32
+ ref_speaker_embed = se_extractor.get_se(speaker_wav, tone_color_converter)
33
 
34
+ # Generate speech using base model (internal prompt and sampling)
35
+ tone_color_converter.infer(
36
+ text=text,
37
+ speaker_id="openvoice",
38
+ language="en",
39
+ ref_speaker=speaker_wav,
40
+ ref_embed=ref_speaker_embed,
41
+ output_path=output_wav,
42
+ top_k=10,
43
+ temperature=0.3
44
+ )
45
 
46
+ return output_wav
 
47
 
 
 
 
 
 
48
 
49
+ demo = gr.Interface(
50
+ fn=clone_and_speak,
51
  inputs=[
52
+ gr.Textbox(label="Enter Text"),
53
+ gr.Audio(type="filepath", label="Upload a Reference Voice (.wav)")
54
  ],
55
+ outputs=gr.Audio(label="Synthesized Output"),
56
+ title="Text to Voice using OpenVoice",
57
+ description="Clone any voice (English) and generate speech using OpenVoice on CPU.",
58
  )
59
 
60
+ if __name__ == "__main__":
61
+ demo.launch()