File size: 3,047 Bytes
c3e6538
 
 
 
bb86e76
c3e6538
 
 
 
 
 
761cc63
e51a96f
 
 
 
 
2db1d26
 
c3e6538
 
761cc63
 
 
 
 
5c9e28a
761cc63
c3e6538
 
 
 
761cc63
c3e6538
4817b42
 
b66c175
761cc63
6e44725
 
 
31adb1a
6e44725
 
b66c175
c3e6538
 
6e44725
c3e6538
6e44725
 
c3e6538
 
 
 
6e44725
761cc63
6e44725
 
 
 
 
 
8ea3af8
6e44725
a6f8f50
6e44725
 
 
 
 
 
 
 
 
 
 
 
 
 
c3e6538
6e44725
c3e6538
4817b42
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import torch
#from transformers.pipelines.audio_utils import ffmpeg_read
from speechscore import SpeechScore 
import gradio as gr
import pprint

MODEL_NAME = "alibabasglab/speechscore"
BATCH_SIZE = 1

device = 0 if torch.cuda.is_available() else "cpu"

def score_file(test_file, ref_file, score_list_nis, score_list_is):
    score_list = []
    if score_list_nis is not None:
        score_list += score_list_nis
    if score_list_is is not None:
        score_list += score_list_is        
    mySpeechScore = SpeechScore(score_list)  
    scores = mySpeechScore(test_path=test_file, reference_path=ref_file, window=None, score_rate=16000, return_mean=False)
    return scores

def score_mic(test_file, score_list_nis):
    score_list = []
    if score_list_nis is not None:
        score_list += score_list_nis       
    mySpeechScore = SpeechScore(score_list)  
    scores = mySpeechScore(test_path=test_file, reference_path=None, window=None, score_rate=16000, return_mean=False)
    return scores

demo = gr.Blocks()

file_score = gr.Interface(
    fn=score_file,
    inputs=[
        gr.Audio(sources=["upload"], label="test file", type="filepath"),
        gr.Audio(sources=["upload"], label="reference file", type="filepath"),
        gr.Dropdown(
            ["DNSMOS", "SRMR"], value=["DNSMOS"], multiselect=True, label="Non-Intrusive Scores", info="Choose scores to include, no reference audio is required."
        ),
        gr.Dropdown(
            ["PESQ", 'NB_PESQ', 'STOI', 'SISDR',
        'FWSEGSNR', 'LSD', 'BSSEval',
        'SNR', 'SSNR', 'LLR', 'CSIG', 'CBAK',
        'COVL', 'MCD'], value=["PESQ", "STOI"], multiselect=True, label="Intrusive Scores", info="Choose scores to include, reference audio is required."
        ),
    ],
    outputs="text",
    title="Score speech quality for an audio clip",
    description=(
        "Score speech quality with the click of a button! Demo includes the"
        " commonly used speech quality assessments for the audio file"
        " of arbitrary length."
    ),
)

mic_score = gr.Interface(
    fn=score_mic,
    inputs=[
        gr.Audio(sources=["microphone"],
                waveform_options=gr.WaveformOptions(
                waveform_color="#01C6FF",
                waveform_progress_color="#0066B4",
                skip_length=2,
                show_controls=False,                
                ),
                type='numpy',
            ),
        gr.Dropdown(
            ["DNSMOS", "SRMR"], value=["DNSMOS", "SRMR"], multiselect=True, label="Non-Intrusive Scores", info="Choose scores to include, no reference audio is required."
        ),
    ],
    outputs="text",
    title="Test microphone quality using speech score",
    description=(
        "Score your microphone quality with the click of a button!"
        " Uses the most popular method to test your microphone quality"
        " with a short speech clip."
    ),
)

with demo:
    gr.TabbedInterface([mic_score, file_score], ["Score Microphone Quality", "Score Speech Quality"])

demo.launch()