from gtts import gTTS from transformers import pipeline import gradio as gr import uuid asr = pipeline('automatic-speech-recognition', "facebook/wav2vec2-conformer-rope-large-960h-ft") corrector = pipeline("text2text-generation", model="pszemraj/grammar-synthesis-small") transcribe = lambda audio: asr(audio)['text'].lower() def to_audio(s): audio_path = f'/tmp/{uuid.uuid4()}.mp3' tts = gTTS(s, tld='us') tts.save(audio_path) return audio_path def transcription(audio, history): if audio: message = transcribe(audio) history.append(( (audio, ) , message)) results = corrector(message) results = '\n'.join([t['generated_text'] for t in results]) history.append( (None, f'**[Grammar and examples]**\n {results}') ) return history def chat(message, history): audio_path = to_audio(message) history.append((message, (audio_path,))) results = corrector(message) results = '\n'.join([t['generated_text'] for t in results]) history.append( (None, f'**[Grammar and examples]**\n {results}') ) return None, history with gr.Blocks(theme=gr.themes.Soft()) as learning: gr.Markdown('# The main aim of this app is to help English learners to speak fluently.') chatbot = gr.Chatbot() with gr.Row(): message = gr.Textbox(label='Send your message to TTS') microphone = gr.Audio(label="Transcribe", source="microphone", type="filepath") microphone.change(transcription, [microphone, chatbot], [chatbot]) microphone.change(lambda:None, None, microphone) message.submit(chat, [message, chatbot], [message, chatbot]) learning.launch()