Spaces:
Build error
Build error
| import nemo.collections.asr as nemo_asr | |
| from pydub import AudioSegment | |
| from io import BytesIO | |
| import tempfile | |
| import os | |
| import gradio as gr | |
| def convert_to_mono(input_file): | |
| # Load the audio file (supports various formats) | |
| sound = AudioSegment.from_file(input_file) | |
| # Convert to mono | |
| sound = sound.set_channels(1) | |
| # Export the mono audio file into a BytesIO object | |
| converted = BytesIO() | |
| sound.export(converted, format="wav") | |
| converted.seek(0) # Move the pointer to the start of the BytesIO object | |
| return converted | |
| # Load the pre-trained ASR model | |
| asr_model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained( | |
| model_name="nvidia/stt_ka_fastconformer_hybrid_large_pc" | |
| ) | |
| def transcribe_audio(audio_file): | |
| if audio_file: | |
| # Convert the uploaded audio to mono | |
| mono_audio = convert_to_mono(audio_file) | |
| # Write the mono audio to a temporary file and close it before transcribing | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: | |
| temp_file.write(mono_audio.read()) | |
| temp_file_path = temp_file.name | |
| # Transcribe the audio using the temporary file path | |
| res = asr_model.transcribe([temp_file_path]) | |
| # Clean up the temporary file | |
| os.remove(temp_file_path) | |
| # Return the transcription result | |
| return res[0][0] | |
| # def transcribe_audio(audio_file): | |
| # if audio_file: | |
| # # Convert the uploaded audio to mono | |
| # mono_audio = convert_to_mono(audio_file) | |
| # # Transcribe the audio using the BytesIO object directly | |
| # audio_data = mono_audio.read() | |
| # # Use the audio_data in the format expected by the ASR model | |
| # res = asr_model.transcribe([BytesIO(audio_data)]) | |
| # # Return the transcription result | |
| # return res[0][0] | |
| # Create the Gradio interface | |
| interface = gr.Interface( | |
| fn=transcribe_audio, | |
| inputs=gr.Audio(type="filepath"), # Allow audio file uploads and get the file path | |
| outputs="text", # Display the transcription result as text | |
| title="ASR Transcription", | |
| description="Upload an audio file (mp3, wav, or m4a) and get the transcription." | |
| ) | |
| # Launch the Gradio interface | |
| interface.launch() | |