Spaces:

Wismut
/

StyleTTS2_Studio

Running

App Files Files Community

Wismut commited on Dec 26, 2024

Commit

7c73742

1 Parent(s): 34ab4db

fixed missing cuda option

Browse files

Files changed (1) hide show

app.py +31 -11

app.py CHANGED Viewed

@@ -33,8 +33,20 @@ ANNOTATED_FEATURES_INFO = [
     "Colloquial | Formal",
 ]
 nltk.download("punkt_tab")
 # Load PCA model and annotated features
 try:
     pca = joblib.load(PCA_MODEL_PATH)
@@ -50,7 +62,9 @@ except FileNotFoundError:
     print(f"Error: Annotated features file '{ANNOTATED_FEATURES_PATH}' not found.")
     annotated_features = None
-# Utility Functions
 def load_voices_json():
@@ -132,8 +146,8 @@ def generate_audio_with_voice(text, voice_key, speed_val):
         print(f"Selected Voice: {voice_key}")
         print(f"Style Vector (First 6): {style_vector[0][:6]}")
-        # Convert to torch tensor
-        style_vec_torch = torch.from_numpy(style_vector).float()
         # Generate audio using the TTS model
         audio_np = tts_with_style_vector(
@@ -148,7 +162,7 @@ def generate_audio_with_voice(text, voice_key, speed_val):
         if audio_np is None:
             print("Audio generation failed.")
-            return None, "Audio generation failed."
         # Prepare audio for Gradio
         sr = 24000  # Adjust based on your actual sampling rate
@@ -216,9 +230,9 @@ def generate_custom_audio(text, voice_key, randomize, speed_str, *slider_values)
             if random_style_vec is None:
                 print("Failed to generate randomized style vector.")
                 return None, None, None
-            # Ensure the style vector is flat
             final_vec = (
-                random_style_vec.numpy().flatten()
                 if isinstance(random_style_vec, torch.Tensor)
                 else np.array(random_style_vec).flatten()
             )
@@ -232,8 +246,10 @@ def generate_custom_audio(text, voice_key, randomize, speed_str, *slider_values)
                 )
                 return None, None, None
-            # Convert to torch tensor
-            style_vec_torch = torch.from_numpy(reconstructed_vec).float().unsqueeze(0)
             # Generate audio with the reconstructed style vector
             audio_np = tts_with_style_vector(
@@ -471,13 +487,17 @@ def create_combined_interface():
             # Save button functionality
             def on_save_style_studio(style_vector, style_name):
                 if not style_name:
-                    return "Please enter a name for the new voice!"
                 result = save_style_to_json(style_vector, style_name)
                 new_choices = list(load_voices_json().keys())
                 # Return multiple values to update both dropdowns and show status
                 return (
-                    gr.Dropdown(choices=new_choices),  # Update first dropdown
-                    gr.Dropdown(choices=new_choices),  # Update studio dropdown
                     result,  # Status message
                 )

     "Colloquial | Formal",
 ]
+# Download necessary NLTK data
 nltk.download("punkt_tab")
+##############################################################################
+# DEVICE CONFIGURATION
+##############################################################################
+# Detect if CUDA is available and set the device accordingly
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+##############################################################################
+# LOAD PCA MODEL AND ANNOTATED FEATURES
+##############################################################################
 # Load PCA model and annotated features
 try:
     pca = joblib.load(PCA_MODEL_PATH)
     print(f"Error: Annotated features file '{ANNOTATED_FEATURES_PATH}' not found.")
     annotated_features = None
+##############################################################################
+# UTILITY FUNCTIONS
+##############################################################################
 def load_voices_json():
         print(f"Selected Voice: {voice_key}")
         print(f"Style Vector (First 6): {style_vector[0][:6]}")
+        # Convert to torch tensor and move to device
+        style_vec_torch = torch.from_numpy(style_vector).float().to(device)
         # Generate audio using the TTS model
         audio_np = tts_with_style_vector(
         if audio_np is None:
             print("Audio generation failed.")
+            return None, None, "Audio generation failed."
         # Prepare audio for Gradio
         sr = 24000  # Adjust based on your actual sampling rate
             if random_style_vec is None:
                 print("Failed to generate randomized style vector.")
                 return None, None, None
+            # Ensure the style vector is flat and on device
             final_vec = (
+                random_style_vec.cpu().numpy().flatten()
                 if isinstance(random_style_vec, torch.Tensor)
                 else np.array(random_style_vec).flatten()
             )
                 )
                 return None, None, None
+            # Convert to torch tensor and move to device
+            style_vec_torch = (
+                torch.from_numpy(reconstructed_vec).float().unsqueeze(0).to(device)
+            )
             # Generate audio with the reconstructed style vector
             audio_np = tts_with_style_vector(
             # Save button functionality
             def on_save_style_studio(style_vector, style_name):
                 if not style_name:
+                    return (
+                        "Please enter a name for the new voice!",
+                        gr.Dropdown.update(),
+                        gr.Dropdown.update(),
+                    )
                 result = save_style_to_json(style_vector, style_name)
                 new_choices = list(load_voices_json().keys())
                 # Return multiple values to update both dropdowns and show status
                 return (
+                    gr.Dropdown.update(choices=new_choices),
+                    gr.Dropdown.update(choices=new_choices),
                     result,  # Status message
                 )