import whisper from pydub import AudioSegment import gradio as gr def convert_6ch_wav_to_stereo(input_file_path, output_file_path): sound = AudioSegment.from_file(input_file_path, format="wav") if sound.channels != 6: sound.export(output_file_path, format="wav") return front_left = sound.split_to_mono()[0] front_right = sound.split_to_mono()[1] center = sound.split_to_mono()[2] back_left = sound.split_to_mono()[4] back_right = sound.split_to_mono()[5] center = center - 6 back_left = back_left - 6 back_right = back_right - 6 stereo_left = front_left.overlay(center).overlay(back_left) stereo_right = front_right.overlay(center).overlay(back_right) stereo_sound = AudioSegment.from_mono_audiosegments(stereo_left, stereo_right) stereo_sound.export(output_file_path, format="wav") def judge_command(file_path): whisper_model = whisper.load_model("medium", device="cpu") out_path='./out.wav' convert_6ch_wav_to_stereo(file_path,out_path) result = whisper_model.transcribe(out_path,language="en") text_result = result['text'] print(text_result) return text_result def handle_audio_transcription(file_path): try: text_result = judge_command(file_path) message = "Transcription successful!" except Exception as e: message = str(e) text_result = "" return message, text_result with gr.Blocks() as audio_transcription_page: gr.Markdown( ''' This space transcribes the spoken words from an audio file to text. ## How to use this Space? - Upload a '.wav' file. - The transcription of the audio will be shown after you click the transcribe button. ## Examples - You can get the test examples from our [Roop Dataset Repo.](https://huggingface.co/datasets/SJTU-TES/WAV2COM) ''' ) with gr.Row(): with gr.Column(): audio_file = gr.File( file_types=[".wav"], label="Upload a '.wav' file", ) info = gr.Textbox( value="", label="Log", placeholder="Transcription results will appear here...", ) transcribe_button = gr.Button("Transcribe") transcribe_button.click( handle_audio_transcription, [audio_file], [info] ) if __name__ == "__main__": audio_transcription_page.launch(debug=True)