#This code for CPU
#import torch
#from transformers import AutoTokenizer, TextStreamer, pipeline
import whisper
#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
model = whisper.load_model("small")
import gradio as gr
import time
import googletrans
from googletrans import Translator
translator = Translator()
lan = googletrans.LANGUAGES
#print(lan)
keys = list(lan.keys())
vals = list(lan.values())

def transcribe(lang,audio):
  #time.sleep(3) 
  print(lang)
  # load audio and pad/trim it to fit 30 seconds
  audio = whisper.load_audio(audio)
  audio = whisper.pad_or_trim(audio)

  # make log_Mel spectrogram and move to the same device as the model
  mel = whisper.log_mel_spectrogram(audio).to(model.device)

  # detect the spoken language
  _, probs = model.detect_language(mel)
  #print(f"Detected language: {max(probs, key=probs.get)}")

  # decode the audio
  options = whisper.DecodingOptions()
  result = whisper.decode(model, mel, options, fp16=False)

  lang = lang.lower()
  #state += translator.translate(result.text,dest=keys[vals.index(lang)]).text + " "
  return translator.translate(result.text,dest=keys[vals.index(lang)]).text

def clear(msg):
    return ""

with gr.Blocks() as demo:
    state = gr.State(value="")
    audio = gr.Audio(label="press start record to speek",source="microphone", type="filepath")
    dropdown = gr.Dropdown(label="first select the destination language",choices=vals)
    msg = gr.Textbox()
    clearBTN = gr.Button("Clear")
    dropdown.select(transcribe, [dropdown,audio], outputs=[msg])
    
    #audio.stream(transcribe, [dropdown,audio,state], outputs=[msg,state])
    #audio.stop_recording(clear, [state], outputs=[msg,state])
    clearBTN.click(clear, [msg], outputs=[msg])
demo.launch(share=True)