Spaces:
Runtime error
Runtime error
| """ | |
| This code is for testing and demonstration. | |
| Source code for credit: https://huggingface.co/spaces/nithinraok/titanet-speaker-verification/blob/main/app.py | |
| """ | |
| import gradio as gr | |
| import torch | |
| from nemo.collections.asr.models import EncDecSpeakerLabelModel | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model_name = "nvidia/speakerverification_en_titanet_large" | |
| model = EncDecSpeakerLabelModel.from_pretrained(model_name).to(device) | |
| def compare(path1, path2): | |
| if not (path1 and path2): | |
| raise gr.Error("Need recordings from both speakers!") | |
| embs1 = model.get_embedding(path1).squeeze() | |
| embs2 = model.get_embedding(path2).squeeze() | |
| #Length Normalize | |
| X = embs1 / torch.linalg.norm(embs1) | |
| Y = embs2 / torch.linalg.norm(embs2) | |
| # Score | |
| similarity_score = torch.dot(X, Y) / ((torch.dot(X, X) * torch.dot(Y, Y)) ** 0.5) | |
| similarity_score = (similarity_score + 1) / 2 | |
| # # Decision | |
| # if similarity_score >= THRESHOLD: | |
| # return OUTPUT_OK.format(similarity_score * 100) | |
| # else: | |
| # return OUTPUT_FAIL.format(similarity_score * 100) | |
| return "{:.4f}".format(similarity_score.item()) | |
| inputs = [ | |
| gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #1"), | |
| gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #2"), | |
| ] | |
| upload_inputs = [ | |
| gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Speaker #1"), | |
| gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Speaker #2"), | |
| ] | |
| description = ( | |
| "The purpose of this demo is to show how VoID could work with speech embeddings rather than mel spectograms.\n" | |
| "This demonstration will analyze two recordings of speech and ascertain whether they have been spoken by the same individual.\n" | |
| "You can attempt this exercise using your own voice." | |
| ) | |
| title="VoID with TitaNet Embeddings" | |
| microphone_interface = gr.Interface( | |
| fn=compare, | |
| inputs=inputs, | |
| outputs="text", | |
| title=title, | |
| description=description, | |
| layout="horizontal", | |
| theme="huggingface", | |
| allow_flagging=False, | |
| live=False, | |
| # examples=examples, | |
| ) | |
| upload_interface = gr.Interface( | |
| fn=compare, | |
| inputs=upload_inputs, | |
| outputs="text", | |
| title=title, | |
| description=description, | |
| layout="horizontal", | |
| theme="huggingface", | |
| allow_flagging=False, | |
| live=False, | |
| # examples=examples, | |
| ) | |
| demo = gr.TabbedInterface([microphone_interface, upload_interface], ["Microphone", "Upload File"]) | |
| demo.launch() |