SohomToom commited on
Commit
a59b93c
·
verified ·
1 Parent(s): 251c251

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -18
app.py CHANGED
@@ -1,34 +1,31 @@
1
-
2
  import os
 
 
 
 
 
 
3
 
4
- # Fixes for HF Hub
5
  os.environ["HF_HOME"] = "/tmp/huggingface"
6
  os.environ["HF_HUB_CACHE"] = "/tmp/huggingface"
7
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
8
-
9
- # Fixes for matplotlib and fontconfig
10
  os.environ["MPLCONFIGDIR"] = "/tmp"
11
  os.environ["XDG_CACHE_HOME"] = "/tmp"
12
  os.environ["XDG_CONFIG_HOME"] = "/tmp"
13
  os.environ["NUMBA_DISABLE_CACHE"] = "1"
14
  os.makedirs("/tmp/huggingface", exist_ok=True)
15
  os.makedirs("/tmp/flagged", exist_ok=True)
16
- import gradio as gr
17
- from openvoice.api import ToneColorConverter
18
- from openvoice import se_extractor
19
- import torch
20
- import time
21
- import uuid
22
 
23
  # Set model paths
24
  ckpt_converter = "checkpoints/converter/config.json"
25
  output_dir = "/tmp/outputs"
26
  os.makedirs(output_dir, exist_ok=True)
27
 
28
- # Initialize converter
29
  tone_color_converter = ToneColorConverter(ckpt_converter)
30
 
31
- # Load base speaker embedding for style transfer
32
  ref_speaker_embed = None
33
 
34
  def clone_and_speak(text, speaker_wav):
@@ -44,7 +41,7 @@ def clone_and_speak(text, speaker_wav):
44
  global ref_speaker_embed
45
  ref_speaker_embed = se_extractor.get_se(speaker_wav, tone_color_converter)
46
 
47
- # Generate speech using base model (internal prompt and sampling)
48
  tone_color_converter.infer(
49
  text=text,
50
  speaker_id="openvoice",
@@ -58,7 +55,7 @@ def clone_and_speak(text, speaker_wav):
58
 
59
  return output_wav
60
 
61
-
62
  demo = gr.Interface(
63
  fn=clone_and_speak,
64
  inputs=[
@@ -66,10 +63,7 @@ demo = gr.Interface(
66
  gr.Audio(type="filepath", label="Upload a Reference Voice (.wav)")
67
  ],
68
  outputs=gr.Audio(label="Synthesized Output"),
69
- flagging_dir = "/tmp/flagged",
70
  title="Text to Voice using OpenVoice",
71
  description="Clone any voice (English) and generate speech using OpenVoice on CPU.",
72
  )
73
-
74
- if __name__ == "__main__":
75
- demo.launch(share=True)
 
 
1
  import os
2
+ import gradio as gr
3
+ from openvoice.api import ToneColorConverter
4
+ from openvoice import se_extractor
5
+ import torch
6
+ import time
7
+ import uuid
8
 
9
+ # Environment fixes for HF Spaces
10
  os.environ["HF_HOME"] = "/tmp/huggingface"
11
  os.environ["HF_HUB_CACHE"] = "/tmp/huggingface"
12
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
 
 
13
  os.environ["MPLCONFIGDIR"] = "/tmp"
14
  os.environ["XDG_CACHE_HOME"] = "/tmp"
15
  os.environ["XDG_CONFIG_HOME"] = "/tmp"
16
  os.environ["NUMBA_DISABLE_CACHE"] = "1"
17
  os.makedirs("/tmp/huggingface", exist_ok=True)
18
  os.makedirs("/tmp/flagged", exist_ok=True)
 
 
 
 
 
 
19
 
20
  # Set model paths
21
  ckpt_converter = "checkpoints/converter/config.json"
22
  output_dir = "/tmp/outputs"
23
  os.makedirs(output_dir, exist_ok=True)
24
 
25
+ # Initialize OpenVoice converter
26
  tone_color_converter = ToneColorConverter(ckpt_converter)
27
 
28
+ # Speaker embedding cache
29
  ref_speaker_embed = None
30
 
31
  def clone_and_speak(text, speaker_wav):
 
41
  global ref_speaker_embed
42
  ref_speaker_embed = se_extractor.get_se(speaker_wav, tone_color_converter)
43
 
44
+ # Generate speech using base model
45
  tone_color_converter.infer(
46
  text=text,
47
  speaker_id="openvoice",
 
55
 
56
  return output_wav
57
 
58
+ # Gradio interface (exposed as global `demo` for HF Spaces)
59
  demo = gr.Interface(
60
  fn=clone_and_speak,
61
  inputs=[
 
63
  gr.Audio(type="filepath", label="Upload a Reference Voice (.wav)")
64
  ],
65
  outputs=gr.Audio(label="Synthesized Output"),
66
+ flagging_dir="/tmp/flagged", # safe temporary dir
67
  title="Text to Voice using OpenVoice",
68
  description="Clone any voice (English) and generate speech using OpenVoice on CPU.",
69
  )