Spaces:
Sleeping
Sleeping
#This code for CPU | |
#import torch | |
#from transformers import AutoTokenizer, TextStreamer, pipeline | |
import whisper | |
#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en") | |
model = whisper.load_model("small") | |
import gradio as gr | |
import time | |
import googletrans | |
from googletrans import Translator | |
translator = Translator() | |
lan = googletrans.LANGUAGES | |
#print(lan) | |
keys = list(lan.keys()) | |
vals = list(lan.values()) | |
def transcribe(lang,audio): | |
#time.sleep(3) | |
print(lang) | |
# load audio and pad/trim it to fit 30 seconds | |
audio = whisper.load_audio(audio) | |
audio = whisper.pad_or_trim(audio) | |
# make log_Mel spectrogram and move to the same device as the model | |
mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
# detect the spoken language | |
_, probs = model.detect_language(mel) | |
#print(f"Detected language: {max(probs, key=probs.get)}") | |
# decode the audio | |
options = whisper.DecodingOptions() | |
result = whisper.decode(model, mel, options, fp16=False) | |
lang = lang.lower() | |
#state += translator.translate(result.text,dest=keys[vals.index(lang)]).text + " " | |
return translator.translate(result.text,dest=keys[vals.index(lang)]).text | |
def clear(msg): | |
return "" | |
with gr.Blocks() as demo: | |
state = gr.State(value="") | |
audio = gr.Audio(label="press start record to speek",source="microphone", type="filepath") | |
dropdown = gr.Dropdown(label="first select the destination language",choices=vals) | |
msg = gr.Textbox() | |
clearBTN = gr.Button("Clear") | |
dropdown.select(transcribe, [dropdown,audio], outputs=[msg]) | |
#audio.stream(transcribe, [dropdown,audio,state], outputs=[msg,state]) | |
#audio.stop_recording(clear, [state], outputs=[msg,state]) | |
clearBTN.click(clear, [msg], outputs=[msg]) | |
demo.launch(share=True) |