from transformers import pipeline import gradio as gr from pyctcdecode import BeamSearchDecoderCTC from Aniemore import EmotionFromVoice # emo = pipeline("audio-classification", model="Aniemore/wav2vec2-xlsr-53-russian-emotion-recognition", trust_remote_code=True) emo = EmotionFromVoice() def transcribe(audio): emotion = emo(audio) return emotion def get_asr_interface(): return gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath") ], outputs=[ "textbox", ]) interfaces = [ get_asr_interface() ] names = [ "ASR" ] gr.TabbedInterface(interfaces, names).launch(server_name = "0.0.0.0", enable_queue=False)