ajnx014 commited on
Commit
b430002
·
verified ·
1 Parent(s): 021277b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -15
app.py CHANGED
@@ -15,7 +15,7 @@ def load_audio(file, target_sr=16000):
15
  return audio
16
 
17
  def extract_embeddings(encoder, audio_files):
18
- """Extracts voice embeddings from uploaded or recorded audio files."""
19
  embeddings = []
20
  for file_path in audio_files:
21
  audio = load_audio(file_path) # Load and preprocess the audio file
@@ -43,10 +43,25 @@ def test_voice(file):
43
  if reference_embeddings is None or len(reference_embeddings) == 0:
44
  return "No reference voice samples found. Please upload training samples first."
45
 
46
- test_audio = load_audio(file.name)
 
 
 
 
 
 
 
 
 
47
  test_embedding = encoder.embed_utterance(test_audio)
 
 
48
  similarity_score = compute_similarity(test_embedding, reference_embeddings)
49
 
 
 
 
 
50
  result = f"Similarity Score: {similarity_score:.2f}\n"
51
  if similarity_score > 0.8:
52
  result += "The voice matches closely with the training samples!\n"
@@ -55,35 +70,26 @@ def test_voice(file):
55
  else:
56
  result += "The voice does not match the training samples."
57
  return result
 
58
  except Exception as e:
59
  return f"Error: {str(e)}"
60
 
61
- def record_and_process(audio):
62
- """Processes recorded audio for training or testing."""
63
- file_path = "temp_recorded.wav"
64
- sf.write(file_path, audio, 16000)
65
- return file_path
66
-
67
  with gr.Blocks() as app:
68
  gr.Markdown("## Voice Recognition with Similarity Testing")
69
- gr.Markdown("**Instruction:** Upload or record a single file of more than 1-minute duration or multiple files totaling more than 1 minute.")
70
  gr.Markdown("[🔗 Link to Eleven Labs](https://elevenlabs.io/app/speech-synthesis/text-to-speech)")
71
  gr.Markdown("**Access Eleven Labs to test the model on multiple voices**")
72
 
73
  with gr.Row():
74
  train_audio = gr.File(label="Upload up to 50 training voice samples", file_types=[".wav"], file_count="multiple")
75
- record_train = gr.Audio(sources=["microphone"], type="numpy", label="Record training voice")
76
  train_button = gr.Button("Train Model")
77
  train_output = gr.Textbox()
78
  train_button.click(train_voice_samples, inputs=train_audio, outputs=train_output)
79
- record_train.change(record_and_process, inputs=record_train, outputs=train_audio)
80
-
81
  with gr.Row():
82
  test_audio = gr.File(label="Upload a test voice file", file_types=[".wav"])
83
- record_test = gr.Audio(sources=["microphone"], type="numpy", label="Record test voice")
84
  test_button = gr.Button("Test Voice")
85
  test_output = gr.Textbox()
86
  test_button.click(test_voice, inputs=test_audio, outputs=test_output)
87
- record_test.change(record_and_process, inputs=record_test, outputs=test_audio)
88
 
89
- app.launch(share=True)
 
15
  return audio
16
 
17
  def extract_embeddings(encoder, audio_files):
18
+ """Extracts voice embeddings from uploaded audio files."""
19
  embeddings = []
20
  for file_path in audio_files:
21
  audio = load_audio(file_path) # Load and preprocess the audio file
 
43
  if reference_embeddings is None or len(reference_embeddings) == 0:
44
  return "No reference voice samples found. Please upload training samples first."
45
 
46
+ # Debugging: Check if file is received
47
+ print(f"Received test file: {file.name}")
48
+
49
+ # Load test audio properly
50
+ test_audio, sr = librosa.load(file.name, sr=16000)
51
+
52
+ # Debugging: Check audio shape
53
+ print(f"Loaded test audio, shape: {test_audio.shape}, Sample rate: {sr}")
54
+
55
+ # Extract embedding
56
  test_embedding = encoder.embed_utterance(test_audio)
57
+
58
+ # Compute similarity
59
  similarity_score = compute_similarity(test_embedding, reference_embeddings)
60
 
61
+ # Debugging: Check similarity score
62
+ print(f"Computed similarity score: {similarity_score}")
63
+
64
+ # Generate result message
65
  result = f"Similarity Score: {similarity_score:.2f}\n"
66
  if similarity_score > 0.8:
67
  result += "The voice matches closely with the training samples!\n"
 
70
  else:
71
  result += "The voice does not match the training samples."
72
  return result
73
+
74
  except Exception as e:
75
  return f"Error: {str(e)}"
76
 
 
 
 
 
 
 
77
  with gr.Blocks() as app:
78
  gr.Markdown("## Voice Recognition with Similarity Testing")
79
+ gr.Markdown("**Instruction:** Upload a single file of more than 1-minute duration or multiple files totaling more than 1 minute.")
80
  gr.Markdown("[🔗 Link to Eleven Labs](https://elevenlabs.io/app/speech-synthesis/text-to-speech)")
81
  gr.Markdown("**Access Eleven Labs to test the model on multiple voices**")
82
 
83
  with gr.Row():
84
  train_audio = gr.File(label="Upload up to 50 training voice samples", file_types=[".wav"], file_count="multiple")
 
85
  train_button = gr.Button("Train Model")
86
  train_output = gr.Textbox()
87
  train_button.click(train_voice_samples, inputs=train_audio, outputs=train_output)
88
+
 
89
  with gr.Row():
90
  test_audio = gr.File(label="Upload a test voice file", file_types=[".wav"])
 
91
  test_button = gr.Button("Test Voice")
92
  test_output = gr.Textbox()
93
  test_button.click(test_voice, inputs=test_audio, outputs=test_output)
 
94
 
95
+ app.launch(share=True)