Spaces:

ajnx014
/

wave-app

Sleeping

App Files Files Community

ajnx014 commited on Mar 22

Commit

b430002

verified ·

1 Parent(s): 021277b

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -15

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ def load_audio(file, target_sr=16000):
     return audio
 def extract_embeddings(encoder, audio_files):
-    """Extracts voice embeddings from uploaded or recorded audio files."""
     embeddings = []
     for file_path in audio_files:
         audio = load_audio(file_path)  # Load and preprocess the audio file
@@ -43,10 +43,25 @@ def test_voice(file):
         if reference_embeddings is None or len(reference_embeddings) == 0:
             return "No reference voice samples found. Please upload training samples first."
-        test_audio = load_audio(file.name)
         test_embedding = encoder.embed_utterance(test_audio)
         similarity_score = compute_similarity(test_embedding, reference_embeddings)
         result = f"Similarity Score: {similarity_score:.2f}\n"
         if similarity_score > 0.8:
             result += "The voice matches closely with the training samples!\n"
@@ -55,35 +70,26 @@ def test_voice(file):
         else:
             result += "The voice does not match the training samples."
         return result
     except Exception as e:
         return f"Error: {str(e)}"
-def record_and_process(audio):
-    """Processes recorded audio for training or testing."""
-    file_path = "temp_recorded.wav"
-    sf.write(file_path, audio, 16000)
-    return file_path
 with gr.Blocks() as app:
     gr.Markdown("## Voice Recognition with Similarity Testing")
-    gr.Markdown("**Instruction:** Upload or record a single file of more than 1-minute duration or multiple files totaling more than 1 minute.")
     gr.Markdown("[🔗 Link to Eleven Labs](https://elevenlabs.io/app/speech-synthesis/text-to-speech)")
     gr.Markdown("**Access Eleven Labs to test the model on multiple voices**")
     with gr.Row():
         train_audio = gr.File(label="Upload up to 50 training voice samples", file_types=[".wav"], file_count="multiple")
-        record_train = gr.Audio(sources=["microphone"], type="numpy", label="Record training voice")
         train_button = gr.Button("Train Model")
     train_output = gr.Textbox()
     train_button.click(train_voice_samples, inputs=train_audio, outputs=train_output)
-    record_train.change(record_and_process, inputs=record_train, outputs=train_audio)
     with gr.Row():
         test_audio = gr.File(label="Upload a test voice file", file_types=[".wav"])
-        record_test = gr.Audio(sources=["microphone"], type="numpy", label="Record test voice")
         test_button = gr.Button("Test Voice")
     test_output = gr.Textbox()
     test_button.click(test_voice, inputs=test_audio, outputs=test_output)
-    record_test.change(record_and_process, inputs=record_test, outputs=test_audio)
-app.launch(share=True)

     return audio
 def extract_embeddings(encoder, audio_files):
+    """Extracts voice embeddings from uploaded audio files."""
     embeddings = []
     for file_path in audio_files:
         audio = load_audio(file_path)  # Load and preprocess the audio file
         if reference_embeddings is None or len(reference_embeddings) == 0:
             return "No reference voice samples found. Please upload training samples first."
+        # Debugging: Check if file is received
+        print(f"Received test file: {file.name}")
+        # Load test audio properly
+        test_audio, sr = librosa.load(file.name, sr=16000)
+        # Debugging: Check audio shape
+        print(f"Loaded test audio, shape: {test_audio.shape}, Sample rate: {sr}")
+        # Extract embedding
         test_embedding = encoder.embed_utterance(test_audio)
+        # Compute similarity
         similarity_score = compute_similarity(test_embedding, reference_embeddings)
+        # Debugging: Check similarity score
+        print(f"Computed similarity score: {similarity_score}")
+        # Generate result message
         result = f"Similarity Score: {similarity_score:.2f}\n"
         if similarity_score > 0.8:
             result += "The voice matches closely with the training samples!\n"
         else:
             result += "The voice does not match the training samples."
         return result
     except Exception as e:
         return f"Error: {str(e)}"
 with gr.Blocks() as app:
     gr.Markdown("## Voice Recognition with Similarity Testing")
+    gr.Markdown("**Instruction:** Upload a single file of more than 1-minute duration or multiple files totaling more than 1 minute.")
     gr.Markdown("[🔗 Link to Eleven Labs](https://elevenlabs.io/app/speech-synthesis/text-to-speech)")
     gr.Markdown("**Access Eleven Labs to test the model on multiple voices**")
     with gr.Row():
         train_audio = gr.File(label="Upload up to 50 training voice samples", file_types=[".wav"], file_count="multiple")
         train_button = gr.Button("Train Model")
     train_output = gr.Textbox()
     train_button.click(train_voice_samples, inputs=train_audio, outputs=train_output)
     with gr.Row():
         test_audio = gr.File(label="Upload a test voice file", file_types=[".wav"])
         test_button = gr.Button("Test Voice")
     test_output = gr.Textbox()
     test_button.click(test_voice, inputs=test_audio, outputs=test_output)
+app.launch(share=True)