|
import gradio as gr |
|
import torch |
|
import torchaudio |
|
import json |
|
from work70 import SpeakerIdentifier, AudioPreprocessor |
|
|
|
|
|
with open('preprocessor_config.json') as f: |
|
preprocessor_config = json.load(f) |
|
|
|
with open('config.json') as f: |
|
model_config = json.load(f) |
|
|
|
model = SpeakerIdentifier(num_speakers=2) |
|
model.load_state_dict(torch.load('pytorch_model.bin', map_location=torch.device('cpu'))) |
|
model.eval() |
|
|
|
preprocessor = AudioPreprocessor(sample_rate=preprocessor_config["sample_rate"]) |
|
speaker_names = model_config["speaker_names"] |
|
|
|
def recognize_speaker(audio): |
|
if audio is None: |
|
return "Aucun audio enregistré." |
|
|
|
waveform, sr = torchaudio.load(audio) |
|
|
|
if sr != preprocessor.sample_rate: |
|
resampler = torchaudio.transforms.Resample(sr, preprocessor.sample_rate) |
|
waveform = resampler(waveform) |
|
|
|
mfcc = preprocessor(waveform) |
|
mfcc = mfcc.unsqueeze(0) |
|
|
|
with torch.no_grad(): |
|
output = model(mfcc) |
|
pred = torch.argmax(output, dim=1).item() |
|
prob = torch.softmax(output, dim=1).max().item() |
|
|
|
if prob > 0.7: |
|
return f"✅ Locuteur reconnu : {speaker_names[pred]} (Confiance {prob*100:.1f}%)" |
|
else: |
|
return f"❓ Locuteur inconnu (Confiance {prob*100:.1f}%)" |
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown("# 🎤 Reconnaissance de Locuteur") |
|
gr.Markdown("Parle dans ton micro, et je te reconnais !") |
|
|
|
with gr.Row(): |
|
audio_input = gr.Audio(source="microphone", type="filepath", label="Enregistre ta voix") |
|
output_label = gr.Label(label="Résultat") |
|
|
|
recognize_btn = gr.Button("Reconnaître") |
|
|
|
recognize_btn.click(fn=recognize_speaker, inputs=audio_input, outputs=output_label) |
|
|
|
app.launch() |
|
|