import whisper import gradio as gr from transformers import pipeline model = whisper.load_model("base") sentiment_analysis = pipeline("sentiment-analysis",model="siebert/sentiment-roberta-large-english") def process_audio_file(file): with open(file, "rb") as f: inputs = f.read() audio = ffmpeg_read(inputs, sampling_rate) return audio def transcribe(Microphone, File_Upload): warn_output = "" if (Microphone is not None) and (File_Upload is not None): warn_output = "WARNING: You've uploaded an audio file and used the microphone. " \ "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n" file = Microphone elif (Microphone is None) and (File_Upload is None): return "ERROR: You have to either use the microphone or upload an audio file" elif Microphone is not None: file = Microphone else: file = File_Upload result = model.transcribe(file, task="translate") return sentiment_analysis(result['text']) iface = gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type='filepath', optional=True), gr.inputs.Audio(source="upload", type='filepath', optional=True), ], outputs=[ gr.outputs.Textbox(label="Language"), gr.Number(label="Probability"), ], layout="horizontal", theme="huggingface", title="Whisper Language Identification", description="Demo for Language Identification using OpenAI's [Whisper Large V2](https://huggingface.co/openai/whisper-large-v2).", allow_flagging='never', ) iface.launch(enable_queue=True)