Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
from sidlingvo import wav_to_dvector | |
from huggingface_hub import hf_hub_download | |
title = "Speaker Recognition Demo" | |
description = """ | |
A demo of conformer-based speaker recognition. | |
Paper: https://arxiv.org/abs/2104.02125 | |
Model: https://huggingface.co/tflite-hub/conformer-speaker-encoder | |
""" | |
repo_id = "tflite-hub/conformer-speaker-encoder" | |
model_path = "models" | |
hf_hub_download(repo_id=repo_id, filename="vad_long_model.tflite", local_dir=model_path) | |
hf_hub_download(repo_id=repo_id, filename="vad_long_mean_stddev.csv", local_dir=model_path) | |
hf_hub_download(repo_id=repo_id, filename="conformer_tisid_medium.tflite", local_dir=model_path) | |
runner = wav_to_dvector.WavToDvectorRunner( | |
vad_model_file=os.path.join(model_path, "vad_long_model.tflite"), | |
vad_mean_stddev_file=os.path.join(model_path, "vad_long_mean_stddev.csv"), | |
tisid_model_file=os.path.join(model_path, "conformer_tisid_medium.tflite")) | |
def predict(enroll_audio, test_audio): | |
score = runner.compute_score([enroll_audio], test_audio) | |
return "Speaker similarity score: " + str(score) | |
if __name__ == "__main__": | |
demo = gr.Interface( | |
fn=predict, | |
inputs=[gr.Audio(type="filepath"), gr.Audio(type="filepath")], | |
outputs="text", | |
title=title, | |
description=description,) | |
demo.launch() |