Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import pipeline | |
| #Load the ASR and Text Translation pipelines | |
| asr_pipeline = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en") | |
| translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M") | |
| #Creating a function to transcribe speech. First we get the output as a text and then we feed that to the translator pipeline. | |
| def transcribe_speech(filepath, target_language): | |
| if not filepath: | |
| return "No audio found, please retry.", [] | |
| # Transcribe the speech | |
| output = asr_pipeline(filepath) | |
| transcription = output["text"] | |
| # Translate the transcribed text | |
| text_translated = translator(transcription, src_lang="eng_Latn", tgt_lang=target_language) | |
| return transcription, text_translated[0]['translation_text'] | |
| # Gradio interfaces for microphone and file upload | |
| mic_transcribe = gr.Interface( | |
| fn=transcribe_speech, | |
| inputs=[ | |
| gr.Audio(sources="microphone", type="filepath"), | |
| gr.Dropdown(label="Target Language", choices=["ben_Beng", "hin_Deva", "fra_Latn", "spa_Latn", "deu_Latn"], value="ben_Beng") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Transcription", lines=3), | |
| gr.Textbox(label="Translation", lines=5) | |
| ], | |
| allow_flagging="never" | |
| ) | |
| file_transcribe = gr.Interface( | |
| fn=transcribe_speech, | |
| inputs=[ | |
| gr.Audio(sources="upload", type="filepath"), | |
| gr.Dropdown(label="Target Language", choices=["ben_Beng", "hin_Deva", "fra_Latn", "spa_Latn", "deu_Latn"], value="ben_Beng") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Transcription", lines=3), | |
| gr.Textbox(label="Translation", lines=5) | |
| ], | |
| allow_flagging="never" | |
| ) | |
| # Choices are presented as the model expects. The language codes are provded in "Languages in FLORES-200" in the link below | |
| #https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200 | |
| # Create the demo with tabbed interfaces | |
| demo = gr.Blocks() | |
| with demo: | |
| gr.TabbedInterface( | |
| [mic_transcribe, file_transcribe], | |
| ["Transcribe Microphone", "Transcribe Audio File"], | |
| ) | |
| # Launch the Gradio demo | |
| if __name__ == "__main__": | |
| demo.launch() |