import os import torchaudio as ta import numpy as np import gradio as gr import random import hashlib import huggingface_hub import string def login(username, state): random_list = [random.choice(string.ascii_lowercase) for n in range(5)] username = username + "_" + "".join(random_list) state["username"] = username state["idx_list"] = random.sample(range(len(state["filepath"])), k=10) state["order_list"] = list(range(0,50)) random.shuffle(state["order_list"]) idx_idx = state["order_list"][state["count"]] // 5 idx = state["idx_list"][idx_idx] which = state["order_list"][state["count"]] % 5 file = state["filepath"][idx][0] file = file.strip().split("/") if which == 0: file[-1] = "vits270/" + file[-1] elif which == 1: file[-1] = "vits274/" + file[-1] elif which == 2: file[-1] = "vits275/" + file[-1] elif which == 3: file[-1] = "vits276/" + file[-1] elif which == 4: file[-1] = "GT/" + file[-1] file = "/".join(file) audio, _ = ta.load(file) return (f"{state['count']+1}回目の評価です.", (22050, audio.data.cpu().numpy().squeeze()), state, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)) def func(evaluation_q, evaluation_e, state): idx_idx = state["order_list"][state["count"]] // 5 idx = state["idx_list"][idx_idx] which = state["order_list"][state["count"]] % 5 state["result"] = state["result"] + "{}|{}|{}/".format(which, evaluation_q, evaluation_e) state["count"] += 1 if state['count']==50: return ("以上で音声評価は終了になります.以下の数字と文字のパスワードをランサーズに入力してください.パスワード: {}, {}".format(state["result"], state["username"]), gr.update(visible=False), state, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)) idx_idx = state["order_list"][state["count"]] // 5 idx = state["idx_list"][idx_idx] which = state["order_list"][state["count"]] % 5 file = state["filepath"][idx][0] file = file.strip().split("/") if which == 0: file[-1] = "vits270/" + file[-1] elif which == 1: file[-1] = "vits274/" + file[-1] elif which == 2: file[-1] = "vits275/" + file[-1] elif which == 3: file[-1] = "vits276/" + file[-1] elif which == 4: file[-1] = "GT/" + file[-1] file = "/".join(file) audio, _ = ta.load(file) return (f"{state['count']+1}回目の評価です.", (22050, audio.data.cpu().numpy().squeeze()), state, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)) with gr.Blocks() as demo: gr.Markdown( """ ## 主観的音声評価 これから右に音声が表示されますので,その音声の品質を [1:とても悪い, 2:悪い, 3:普通, 4:良い, 5:とても良い]で評価してください. """ ) with open("./text_expressive.txt", encoding='utf-8') as f: filepath = [line.strip().split("|") for line in f] state = gr.State({ "username": None, "count": 0, "filepath": filepath, "idx_list": None, "order_list": None, "result": "" }) with gr.Row(): with gr.Column(): tb_username = gr.Textbox(label="あなたの名前を入力してください") button_login = gr.Button(value="音声評価を始める") with gr.Column(): message = gr.Textbox(max_lines=50) audioA = gr.Audio(label="評価対象の音声") evaluation_q = gr.Slider(1, 5, value=4, step=1, label="音声の自然性について:(1:とても悪い, 2:悪い, 3:普通, 4:良い, 5:とても良い)", visible=False) evaluation_e = gr.Slider(1, 5, value=4, step=1, label="音声の表現力について:(1:とても悪い, 2:悪い, 3:普通, 4:良い, 5:とても良い)", visible=False) button = gr.Button(value="評価を送信する", visible=False) button_login.click(login, [tb_username, state], [message, audioA, state, tb_username, button_login, evaluation_q, evaluation_e, button]) button.click(func, [evaluation_q, evaluation_e, state], [message, audioA, state, evaluation_q, evaluation_e, button]) demo.launch()