Spaces:
Sleeping
Sleeping
File size: 766 Bytes
5b1f241 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
#test correct replication of speaker phonemes
from transformers import pipeline
pipe = pipeline(
task="zero-shot-audio-classification", model="laion/clap-htsat-unfused"
)
import numpy as np
import gradio as gr
def get_labels(target):
return [f"An adult speaking.", f"A child speaking."]
def classify_audio(audio, target=None):
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
# get labels
candidate_labels = get_labels(target)
preds = pipe(y, candidate_labels=candidate_labels)
outputs = {}
for p in preds:
outputs[p["label"]] = p["score"]
return outputs
demo = gr.Interface(
fn=classify_audio, inputs=[gr.Audio(source="microphone")], outputs=gr.outputs.Label()
)
demo.launch(debug=False) |