#This code for CPU #import torch #from transformers import AutoTokenizer, TextStreamer, pipeline import whisper #transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en") model = whisper.load_model("small") import gradio as gr import time import googletrans from googletrans import Translator translator = Translator() lan = googletrans.LANGUAGES #print(lan) keys = list(lan.keys()) vals = list(lan.values()) def transcribe(lang,audio): #time.sleep(3) print(lang) # load audio and pad/trim it to fit 30 seconds audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) # make log_Mel spectrogram and move to the same device as the model mel = whisper.log_mel_spectrogram(audio).to(model.device) # detect the spoken language _, probs = model.detect_language(mel) #print(f"Detected language: {max(probs, key=probs.get)}") # decode the audio options = whisper.DecodingOptions() result = whisper.decode(model, mel, options, fp16=False) lang = lang.lower() #state += translator.translate(result.text,dest=keys[vals.index(lang)]).text + " " return translator.translate(result.text,dest=keys[vals.index(lang)]).text def clear(msg): return "" with gr.Blocks() as demo: state = gr.State(value="") audio = gr.Audio(label="press start record to speek",source="microphone", type="filepath") dropdown = gr.Dropdown(label="first select the destination language",choices=vals) msg = gr.Textbox() clearBTN = gr.Button("Clear") dropdown.select(transcribe, [dropdown,audio], outputs=[msg]) #audio.stream(transcribe, [dropdown,audio,state], outputs=[msg,state]) #audio.stop_recording(clear, [state], outputs=[msg,state]) clearBTN.click(clear, [msg], outputs=[msg]) demo.launch(share=True)