crackuser commited on
Commit
71d678c
Β·
verified Β·
1 Parent(s): f61fc95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -86
app.py CHANGED
@@ -8,7 +8,6 @@ from contextlib import contextmanager
8
 
9
  warnings.filterwarnings("ignore")
10
  os.environ["COQUI_TOS_AGREED"] = "1"
11
-
12
  print("πŸš€ Starting Voice Cloning Studio...")
13
 
14
  @contextmanager
@@ -63,12 +62,25 @@ def load_whisper():
63
  return False
64
 
65
  def voice_to_voice_clone(reference_audio, input_audio, language="en"):
 
 
 
66
  try:
 
 
 
 
67
  if not reference_audio or not input_audio:
68
  return None, "❌ Please upload both reference and input audio files!"
 
 
69
  if not load_xtts_manual():
70
  return None, f"❌ XTTS loading failed!\nStatus: {MODEL_STATUS}"
 
 
71
  load_whisper()
 
 
72
  extracted_text = "Voice cloning demonstration."
73
  if WHISPER_MODEL:
74
  try:
@@ -79,8 +91,12 @@ def voice_to_voice_clone(reference_audio, input_audio, language="en"):
79
  print(f"βœ… Extracted: '{extracted_text[:100]}...'")
80
  except Exception as e:
81
  print(f"⚠️ Whisper error: {e}")
 
 
82
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
83
  output_path = tmp_file.name
 
 
84
  with patch_torch_load():
85
  TTS_MODEL.tts_to_file(
86
  text=extracted_text,
@@ -88,99 +104,68 @@ def voice_to_voice_clone(reference_audio, input_audio, language="en"):
88
  language=language,
89
  file_path=output_path
90
  )
 
 
91
  if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
92
- return output_path, f"""βœ… VOICE-TO-VOICE CLONING SUCCESS!
93
-
94
  πŸ“ Content: '{extracted_text[:150]}...'
95
  🎭 Device: {DEVICE}
96
  πŸ”§ Status: {MODEL_STATUS}
 
97
  """
 
 
98
  else:
99
  return None, "❌ Generated audio file is empty!"
 
100
  except Exception as e:
101
- return None, f"❌ Voice cloning error: {str(e)}\nModel: {MODEL_STATUS}"
 
 
102
 
103
- with gr.Blocks(title="Voice Cloning Studio") as demo:
104
- gr.HTML("""
105
- <div style="text-align: center; padding: 25px;">
106
- <h1>🎭 REAL Voice Cloning Studio</h1>
107
- <p>Status: Models load on first use</p>
108
- </div>
109
- """)
110
-
111
- with gr.Row():
112
- with gr.Column():
113
- reference_audio = gr.Audio(
114
- label="🎀 Reference Audio (Voice to Clone)",
115
- type="filepath",
116
- sources=["upload", "microphone"]
117
- )
118
- input_audio = gr.Audio(
119
- label="🎡 Input Audio (Content to Transform)",
120
- type="filepath",
121
- sources=["upload", "microphone"]
122
- )
123
- language = gr.Dropdown(
124
- choices=[
125
- ("English", "en"),
126
- ("Spanish", "es"),
127
- ("French", "fr"),
128
- ("German", "de"),
129
- ("Italian", "it"),
130
- ("Portuguese", "pt"),
131
- ("Polish", "pl"),
132
- ("Turkish", "tr"),
133
- ("Russian", "ru"),
134
- ("Dutch", "nl"),
135
- ("Czech", "cs"),
136
- ("Arabic", "ar"),
137
- ("Chinese", "zh"),
138
- ("Japanese", "ja"),
139
- ("Korean", "ko"),
140
- ("Hindi", "hi"),
141
- ("Ukrainian", "uk"),
142
- ("Vietnamese", "vi"),
143
- ("Romanian", "ro"),
144
- ("Greek", "el"),
145
- ("Hebrew", "he"),
146
- ("Finnish", "fi"),
147
- ("Hungarian", "hu"),
148
- ("Swedish", "sv"),
149
- ("Catalan", "ca"),
150
- ("Indonesian", "id"),
151
- ("Malay", "ms"),
152
- ("Bulgarian", "bg"),
153
- ("Slovak", "sk"),
154
- ("Danish", "da"),
155
- ("Norwegian", "no"),
156
- ("Lithuanian", "lt"),
157
- ("Croatian", "hr"),
158
- ("Serbian", "sr"),
159
- ("Slovenian", "sl"),
160
- ("Estonian", "et"),
161
- ("Latvian", "lv"),
162
- ("Filipino", "fil"),
163
- ("Bengali", "bn"),
164
- ("Tamil", "ta"),
165
- ("Telugu", "te"),
166
- ("Urdu", "ur"),
167
- ("Farsi", "fa"),
168
- ("Thai", "th"),
169
- ],
170
- value="en",
171
- label="Language"
172
- )
173
- clone_btn = gr.Button("Clone Voice", variant="primary", size="lg")
174
- with gr.Column():
175
- output_audio = gr.Audio(label="Cloned Voice Result")
176
- status_output = gr.Textbox(label="Status", lines=12, interactive=False)
177
-
178
- clone_btn.click(
179
- fn=voice_to_voice_clone,
180
- inputs=[reference_audio, input_audio, language],
181
- outputs=[output_audio, status_output],
182
- show_progress=True
183
- )
184
 
185
  if __name__ == "__main__":
186
- demo.launch()
 
 
 
 
 
 
 
 
8
 
9
  warnings.filterwarnings("ignore")
10
  os.environ["COQUI_TOS_AGREED"] = "1"
 
11
  print("πŸš€ Starting Voice Cloning Studio...")
12
 
13
  @contextmanager
 
62
  return False
63
 
64
  def voice_to_voice_clone(reference_audio, input_audio, language="en"):
65
+ """
66
+ Main voice cloning function - this will be called by both UI and API
67
+ """
68
  try:
69
+ print(f"🎭 Voice cloning request: {language}")
70
+ print(f"πŸ“ Reference: {reference_audio}")
71
+ print(f"πŸ“ Input: {input_audio}")
72
+
73
  if not reference_audio or not input_audio:
74
  return None, "❌ Please upload both reference and input audio files!"
75
+
76
+ # Load XTTS model
77
  if not load_xtts_manual():
78
  return None, f"❌ XTTS loading failed!\nStatus: {MODEL_STATUS}"
79
+
80
+ # Load Whisper for transcription
81
  load_whisper()
82
+
83
+ # Extract text from input audio
84
  extracted_text = "Voice cloning demonstration."
85
  if WHISPER_MODEL:
86
  try:
 
91
  print(f"βœ… Extracted: '{extracted_text[:100]}...'")
92
  except Exception as e:
93
  print(f"⚠️ Whisper error: {e}")
94
+
95
+ # Generate cloned voice
96
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
97
  output_path = tmp_file.name
98
+
99
+ print(f"πŸ”„ Generating voice clone...")
100
  with patch_torch_load():
101
  TTS_MODEL.tts_to_file(
102
  text=extracted_text,
 
104
  language=language,
105
  file_path=output_path
106
  )
107
+
108
+ # Verify output
109
  if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
110
+ success_message = f"""βœ… VOICE-TO-VOICE CLONING SUCCESS!
 
111
  πŸ“ Content: '{extracted_text[:150]}...'
112
  🎭 Device: {DEVICE}
113
  πŸ”§ Status: {MODEL_STATUS}
114
+ πŸ“Š Output size: {os.path.getsize(output_path)} bytes
115
  """
116
+ print("βœ… Voice cloning completed successfully!")
117
+ return output_path, success_message
118
  else:
119
  return None, "❌ Generated audio file is empty!"
120
+
121
  except Exception as e:
122
+ error_msg = f"❌ Voice cloning error: {str(e)}\nModel: {MODEL_STATUS}"
123
+ print(error_msg)
124
+ return None, error_msg
125
 
126
+ # FIXED: Use gr.Interface instead of gr.Blocks for proper API exposure
127
+ interface = gr.Interface(
128
+ fn=voice_to_voice_clone,
129
+ inputs=[
130
+ gr.Audio(
131
+ label="🎀 Reference Audio (Voice to Clone)",
132
+ type="filepath",
133
+ sources=["upload"]
134
+ ),
135
+ gr.Audio(
136
+ label="🎡 Input Audio (Content to Transform)",
137
+ type="filepath",
138
+ sources=["upload"]
139
+ ),
140
+ gr.Dropdown(
141
+ choices=[
142
+ "en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl",
143
+ "cs", "ar", "zh", "ja", "ko", "hi", "uk", "vi", "ro", "el",
144
+ "he", "fi", "hu", "sv", "ca", "id", "ms", "bg", "sk", "da",
145
+ "no", "lt", "hr", "sr", "sl", "et", "lv", "fil", "bn", "ta",
146
+ "te", "ur", "fa", "th"
147
+ ],
148
+ value="en",
149
+ label="🌍 Language"
150
+ )
151
+ ],
152
+ outputs=[
153
+ gr.Audio(label="πŸŽ‰ Cloned Voice Result"),
154
+ gr.Textbox(label="πŸ“‹ Status", lines=8)
155
+ ],
156
+ title="🎭 REAL Voice Cloning Studio",
157
+ description="Transform any voice into any other voice using XTTS-v2 and Whisper AI models. Upload reference audio and input audio to get started.",
158
+ theme=gr.themes.Soft(),
159
+ allow_flagging="never",
160
+ api_name="voice_to_voice_clone" # CRITICAL: This creates the API endpoint
161
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  if __name__ == "__main__":
164
+ print("🌐 Launching Voice Cloning Studio...")
165
+ interface.launch(
166
+ server_name="0.0.0.0",
167
+ server_port=7860,
168
+ share=False,
169
+ show_api=True, # Shows API documentation
170
+ debug=True
171
+ )