from gtts import gTTS
from transformers import pipeline
import gradio as gr
import uuid

asr = pipeline('automatic-speech-recognition', "facebook/wav2vec2-conformer-rope-large-960h-ft")
corrector = pipeline("text2text-generation", model="pszemraj/grammar-synthesis-small")

transcribe = lambda audio: asr(audio)['text'].lower()

def to_audio(s):
  audio_path = f'/tmp/{uuid.uuid4()}.mp3'
  tts = gTTS(s, tld='us')
  tts.save(audio_path)
  return audio_path


def transcription(audio, history):
  if audio:
    message = transcribe(audio)
    history.append(( (audio, ) , message))
    results = corrector(message)
    results = '\n'.join([t['generated_text'] for t in results])
    history.append( (None, f'**[Grammar and examples]**\n {results}') )

  return history

def chat(message, history):
  audio_path = to_audio(message)
  history.append((message, (audio_path,)))
  results = corrector(message)
  results = '\n'.join([t['generated_text'] for t in results])
  history.append( (None, f'**[Grammar and examples]**\n {results}') )
  
  return None, history

with gr.Blocks(theme=gr.themes.Soft()) as learning:
    gr.Markdown('# The main aim of this app is to help English learners to speak fluently.')

    chatbot = gr.Chatbot()

    with gr.Row():
      message = gr.Textbox(label='Send your message to TTS')
      microphone = gr.Audio(label="Transcribe", source="microphone", type="filepath")
    
    microphone.change(transcription, [microphone, chatbot], [chatbot])
    microphone.change(lambda:None, None, microphone)
    message.submit(chat, [message, chatbot], [message, chatbot])

learning.launch()