import torch #from transformers.pipelines.audio_utils import ffmpeg_read from speechscore import SpeechScore import gradio as gr import pprint MODEL_NAME = "alibabasglab/speechscore" BATCH_SIZE = 1 device = 0 if torch.cuda.is_available() else "cpu" def score_file(test_file, ref_file, score_list_nis, score_list_is): score_list = [] if score_list_nis is not None: score_list += score_list_nis if score_list_is is not None: score_list += score_list_is mySpeechScore = SpeechScore(score_list) scores = mySpeechScore(test_path=test_file, reference_path=ref_file, window=None, score_rate=16000, return_mean=False, round_digits=2) return scores def score_mic(test_file, score_list_nis): score_list = [] if score_list_nis is not None: score_list += score_list_nis mySpeechScore = SpeechScore(score_list) scores = mySpeechScore(test_path=test_file, reference_path=None, window=None, score_rate=16000, return_mean=False, round_digits=2) return scores demo = gr.Blocks() file_score = gr.Interface( fn=score_file, inputs=[ gr.Audio(sources=["upload"], label="test file", type="filepath"), gr.Audio(sources=["upload"], label="reference file", type="filepath"), gr.Dropdown( ["DNSMOS", "SRMR"], value=["DNSMOS"], multiselect=True, label="Non-Intrusive Scores", info="Choose scores to include, reference audio is not required." ), gr.Dropdown( ["PESQ", 'NB_PESQ', 'STOI', 'SISDR', 'FWSEGSNR', 'LSD', 'BSSEval', 'SNR', 'SSNR', 'LLR', 'CSIG', 'CBAK', 'COVL', 'MCD'], value=["PESQ", "STOI"], multiselect=True, label="Intrusive Scores", info="Choose scores to include, reference audio is required." ), ], outputs="text", title="Score speech quality for an audio clip", description=( "Score speech quality with the click of a button! Demo includes the" " commonly used speech quality assessments for the audio file" " of arbitrary length." ), ) mic_score = gr.Interface( fn=score_mic, inputs=[ gr.Audio(sources=["microphone"], waveform_options=gr.WaveformOptions( waveform_color="#01C6FF", waveform_progress_color="#0066B4", skip_length=2, show_controls=False, ), type='numpy', ), gr.Dropdown( ["DNSMOS", "SRMR"], value=["DNSMOS", "SRMR"], multiselect=True, label="Non-Intrusive Scores", info="Choose scores to include, no reference audio is required." ), ], outputs="text", title="Test microphone quality using speech score", description=( "Score your microphone quality with the click of a button!" " Uses the most popular method to test your microphone quality" " with a short speech clip." ), ) with demo: gr.TabbedInterface([mic_score, file_score], ["Score Microphone Quality", "Score Speech Quality"]) demo.launch()