import os from huggingface_hub import notebook_login import gradio as gr from transformers import pipeline from deep_translator import GoogleTranslator from gtts import gTTS import shutil from pytube import YouTube pipe = pipeline( "automatic-speech-recognition", model="nelanbu/ID2223_Lab2_Whisper", chunk_length_s=30, batch_size=16, stride_length_s=(4, 2), device_map="auto") def transcribe(audio_input, link_input, lang): try: if link_input: # video=YouTube(link_input).streams.filter(only_audio=True).all() # audio=video[0].download() audio_streams = YouTube(link_input).streams.filter(only_audio=True).order_by('abr').desc() best_audio = audio_streams.first() audio=best_audio.download() elif audio_input: audio = audio_input # input_path = "input.mp3" # shutil.copyfile(audio, input_path) # print(f"Input audio is saved to {input_path}") result = pipe(audio) text = result['text'] print(f"Transcribed text: {text}") if lang == 'english': target_lang = 'en' elif lang == 'swedish': target_lang = 'sv' elif lang == 'italian': target_lang = 'it' elif lang == 'german': target_lang = 'de' elif lang == 'spanish': target_lang = 'es' elif lang == 'french': target_lang = 'fr' elif lang == 'danish': target_lang = 'da' elif lang == 'russian': target_lang = 'ru' elif lang == 'japanese': target_lang = 'ja' elif lang == 'chinese (simplified)': target_lang = 'zh-cn' translator = GoogleTranslator(source='auto', target=target_lang) translated_text = translator.translate(text) print(f"Translated text: {translated_text}") # Convert translated text to speech tts = gTTS(text=translated_text, tld='com', slow=False, lang=target_lang) output_audio_path = "test2.mp3" tts.save(output_audio_path) print(f"Saved TTS audio to {output_audio_path}") return text, translated_text, output_audio_path except Exception as e: print(f"An error occurred: {e}") raise demo = gr.Interface( fn=transcribe, inputs=[gr.Audio(type="filepath", label="Translate from microphone/MP3 input"), gr.Text(max_lines=1, label="Translate from YouTube URL"), gr.Dropdown( ["english", "swedish", "italian", "german", "spanish", "french", "danish", "russian", "japanese", "chinese"], label="Language", info="pick the language you want to translate your auido"), ], outputs=[gr.Text(label="transcribed text"), gr.Text(label="translated text"), gr.Audio(type="filepath", label="translated audio")], title="Turkish Audio Translator", description="You can upload YouTube link of a video in Turkish or use the microphone to record your voice or upload an MP3 file to translate Turkish audio to other languages." ) if __name__ == "__main__": demo.launch(debug=True)