import os import torchaudio as ta import gradio as gr import random import huggingface_hub from huggingface_hub import Repository DATASET_REPO_URL = "https://huggingface.co/datasets/m-kazuki/speechEvaluation_normal" # DATA_FILENAME = "data.csv" # DATA_FILE = os.path.join("data", DATA_FILENAME) HF_TOKEN = os.environ.get("HF_TOKEN") repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN ) def login(username, state): state["username"] = username idx_idx = state["order_list"][state["count"]] // 4 idx = state["idx_list"][idx_idx] which = state["order_list"][state["count"]] % 4 file = state["filepath"][idx][0] file = file.strip().split("/") if which == 0: file[-1] = "vits270/" + file[-1] elif which == 1: file[-1] = "vits73/" + file[-1] elif which == 2: file[-1] = "grad_tts88/" + file[-1] elif which == 3: file[-1] = "GT/" + file[-1] file = "/".join(file) audio, _ = ta.load(file) return (f"{state['count']+1}回目の評価です.", (22050, audio.data.cpu().numpy().squeeze()), state, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)) def func(evaluation, state): idx = state["order_list"][state["count"]] // 4 which = state["order_list"][state["count"]] % 4 with open('./data/{}.log'.format(state['username']), 'a') as log: log.write("{}|{}|{}\n".format(which, idx, evaluation)) state["count"] += 1 if state['count']==40: repo.push_to_hub() return ("以上で音声評価は終了になります.下記のパスワードをランサーズの備考欄に記入することで作業の完了を確認します.パスワード: fhaofkeofu", gr.update(visible=False), state, gr.update(visible=False), gr.update(visible=False)) idx = state["order_list"][state["count"]] // 4 which = state["order_list"][state["count"]] % 4 file = state["filepath"][idx][0] file = file.strip().split("/") if which == 0: file[-1] = "vits270/" + file[-1] elif which == 1: file[-1] = "vits73/" + file[-1] elif which == 2: file[-1] = "grad_tts88/" + file[-1] elif which == 3: file[-1] = "GT/" + file[-1] file = "/".join(file) audio, _ = ta.load(file) return (f"{state['count']+1}回目の評価です.", (22050, audio.data.cpu().numpy().squeeze()), state, gr.update(visible=True), gr.update(visible=True)) with gr.Blocks() as demo: gr.Markdown( """ ## 主観的音声評価 これから右に音声が表示されますので,その音声の品質を [1:とても悪い, 2:悪い, 3:普通, 4:良い, 5:とても良い]で評価してください. """ ) with open("./text_normal.txt", encoding='utf-8') as f: filepath = [line.strip().split("|") for line in f] idx_list = random.sample(range(len(filepath)), k=10) order_list = list(range(0,40)) random.shuffle(order_list) state = gr.State({ "username": None, "count": 0, "filepath": filepath, "idx_list": idx_list, "order_list": order_list }) with gr.Row(): with gr.Column(): tb_username = gr.Textbox(label="あなたの名前を入力してください") button_login = gr.Button(value="音声評価を始める") with gr.Column(): message = gr.Markdown() audioA = gr.Audio(label="評価対象の音声") evaluation = gr.Slider(1, 5, value=3, step=1, label="1:とても悪い, 2:悪い, 3:普通, 4:良い, 5:とても良い", visible=False) button = gr.Button(value="評価を送信する", visible=False) button_login.click(login, [tb_username, state], [message, audioA, state, tb_username, button_login, evaluation, button]) button.click(func, [evaluation, state], [message, audioA, state, evaluation, button]) demo.launch()