Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import librosa | |
import time | |
import pandas as pd | |
from datetime import datetime | |
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor | |
DESCRIPTION = "Store a record of previous calls in order to verify if the client already called or not. Pretrained on `https://huggingface.co/datasets/superb` using [S3PRL recipe](https://github.com/s3prl/s3prl/tree/master/s3prl/downstream/voxceleb1)." | |
# COLUMNS = ["call_id", "date", "client_id", "duration", "new"] | |
model = Wav2Vec2ForSequenceClassification.from_pretrained("superb/wav2vec2-large-superb-sid") | |
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("superb/wav2vec2-large-superb-sid") | |
def file_to_array(path): | |
speech, _ = librosa.load(path, sr=16000, mono=True) | |
duration = librosa.get_duration(y=speech) | |
return speech, duration | |
def handler(audio_path): | |
calls = pd.read_csv("call_records.csv") | |
speech, duration = file_to_array(audio_path) | |
# compute attention masks and normalize the waveform if needed | |
inputs = feature_extractor(speech, sampling_rate=16000, padding=True, return_tensors="pt") | |
logits = model(**inputs).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
labels = [model.config.id2label[_id] for _id in predicted_ids.tolist()] | |
client_id = labels[0] | |
call_id = str(int(time.time())) | |
date = datetime.now().strftime("%d/%m/%Y %H:%M:%S") | |
n_of_calls = len(calls.loc[calls.client_id == client_id]) | |
new = n_of_calls == 0 | |
# add new call record | |
record = [call_id, date, client_id, duration, new] | |
calls.loc[len(calls)] = record | |
calls.to_csv("call_records.csv", index=False) | |
if new: | |
return f"New client call: Client ID {client_id}" | |
return f"Client {client_id} calling again: {n_of_calls} previous calls" | |
first = gr.Interface( | |
fn=handler, | |
inputs=gr.Audio(label="Speech Audio", type="filepath"), | |
outputs=gr.Text(label="Output", value="..."), | |
description=DESCRIPTION | |
) | |
second = gr.Interface( | |
fn=handler, | |
inputs=gr.Audio(label="Microphone Input", source="microphone", type="filepath"), | |
outputs=gr.Text(label="Output", value="..."), | |
description=DESCRIPTION | |
) | |
app = gr.TabbedInterface( | |
[first, second], | |
title="Speaker Call Verification 🎤", | |
tab_names=["Audio Upload", "Microphone"], | |
) | |
app.launch() |