Spaces:
Running
Running
File size: 3,080 Bytes
c3e6538 bb86e76 c3e6538 761cc63 e51a96f 2db1d26 17e3ef6 c3e6538 761cc63 17e3ef6 761cc63 c3e6538 761cc63 c3e6538 4817b42 b66c175 144c7ec 6e44725 31adb1a 6e44725 b66c175 c3e6538 6e44725 c3e6538 6e44725 c3e6538 6e44725 761cc63 6e44725 8ea3af8 6e44725 a6f8f50 6e44725 c3e6538 6e44725 c3e6538 4817b42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import torch
#from transformers.pipelines.audio_utils import ffmpeg_read
from speechscore import SpeechScore
import gradio as gr
import pprint
MODEL_NAME = "alibabasglab/speechscore"
BATCH_SIZE = 1
device = 0 if torch.cuda.is_available() else "cpu"
def score_file(test_file, ref_file, score_list_nis, score_list_is):
score_list = []
if score_list_nis is not None:
score_list += score_list_nis
if score_list_is is not None:
score_list += score_list_is
mySpeechScore = SpeechScore(score_list)
scores = mySpeechScore(test_path=test_file, reference_path=ref_file, window=None, score_rate=16000, return_mean=False, round_digits=2)
return scores
def score_mic(test_file, score_list_nis):
score_list = []
if score_list_nis is not None:
score_list += score_list_nis
mySpeechScore = SpeechScore(score_list)
scores = mySpeechScore(test_path=test_file, reference_path=None, window=None, score_rate=16000, return_mean=False, round_digits=2)
return scores
demo = gr.Blocks()
file_score = gr.Interface(
fn=score_file,
inputs=[
gr.Audio(sources=["upload"], label="test file", type="filepath"),
gr.Audio(sources=["upload"], label="reference file", type="filepath"),
gr.Dropdown(
["DNSMOS", "SRMR"], value=["DNSMOS"], multiselect=True, label="Non-Intrusive Scores", info="Choose scores to include, reference audio is not required."
),
gr.Dropdown(
["PESQ", 'NB_PESQ', 'STOI', 'SISDR',
'FWSEGSNR', 'LSD', 'BSSEval',
'SNR', 'SSNR', 'LLR', 'CSIG', 'CBAK',
'COVL', 'MCD'], value=["PESQ", "STOI"], multiselect=True, label="Intrusive Scores", info="Choose scores to include, reference audio is required."
),
],
outputs="text",
title="Score speech quality for an audio clip",
description=(
"Score speech quality with the click of a button! Demo includes the"
" commonly used speech quality assessments for the audio file"
" of arbitrary length."
),
)
mic_score = gr.Interface(
fn=score_mic,
inputs=[
gr.Audio(sources=["microphone"],
waveform_options=gr.WaveformOptions(
waveform_color="#01C6FF",
waveform_progress_color="#0066B4",
skip_length=2,
show_controls=False,
),
type='numpy',
),
gr.Dropdown(
["DNSMOS", "SRMR"], value=["DNSMOS", "SRMR"], multiselect=True, label="Non-Intrusive Scores", info="Choose scores to include, no reference audio is required."
),
],
outputs="text",
title="Test microphone quality using speech score",
description=(
"Score your microphone quality with the click of a button!"
" Uses the most popular method to test your microphone quality"
" with a short speech clip."
),
)
with demo:
gr.TabbedInterface([mic_score, file_score], ["Score Microphone Quality", "Score Speech Quality"])
demo.launch() |