nelanbu's picture
some fixes
4ccb2e7
import os
from huggingface_hub import notebook_login
import gradio as gr
from transformers import pipeline
from deep_translator import GoogleTranslator
from gtts import gTTS
import shutil
from pytube import YouTube
pipe = pipeline(
"automatic-speech-recognition",
model="nelanbu/ID2223_Lab2_Whisper",
chunk_length_s=30,
batch_size=16,
stride_length_s=(4, 2),
device_map="auto")
def transcribe(audio_input, link_input, lang):
try:
if link_input:
# video=YouTube(link_input).streams.filter(only_audio=True).all()
# audio=video[0].download()
audio_streams = YouTube(link_input).streams.filter(only_audio=True).order_by('abr').desc()
best_audio = audio_streams.first()
audio=best_audio.download()
elif audio_input:
audio = audio_input
# input_path = "input.mp3"
# shutil.copyfile(audio, input_path)
# print(f"Input audio is saved to {input_path}")
result = pipe(audio)
text = result['text']
print(f"Transcribed text: {text}")
if lang == 'english':
target_lang = 'en'
elif lang == 'swedish':
target_lang = 'sv'
elif lang == 'italian':
target_lang = 'it'
elif lang == 'german':
target_lang = 'de'
elif lang == 'spanish':
target_lang = 'es'
elif lang == 'french':
target_lang = 'fr'
elif lang == 'danish':
target_lang = 'da'
elif lang == 'russian':
target_lang = 'ru'
elif lang == 'japanese':
target_lang = 'ja'
elif lang == 'chinese (simplified)':
target_lang = 'zh-cn'
translator = GoogleTranslator(source='auto', target=target_lang)
translated_text = translator.translate(text)
print(f"Translated text: {translated_text}")
# Convert translated text to speech
tts = gTTS(text=translated_text, tld='com', slow=False, lang=target_lang)
output_audio_path = "test2.mp3"
tts.save(output_audio_path)
print(f"Saved TTS audio to {output_audio_path}")
return text, translated_text, output_audio_path
except Exception as e:
print(f"An error occurred: {e}")
raise
demo = gr.Interface(
fn=transcribe,
inputs=[gr.Audio(type="filepath", label="Translate from microphone/MP3 input"),
gr.Text(max_lines=1, label="Translate from YouTube URL"),
gr.Dropdown(
["english", "swedish", "italian", "german", "spanish", "french", "danish", "russian", "japanese", "chinese"], label="Language", info="pick the language you want to translate your auido"),
],
outputs=[gr.Text(label="transcribed text"), gr.Text(label="translated text"), gr.Audio(type="filepath", label="translated audio")],
title="Turkish Audio Translator",
description="You can upload YouTube link of a video in Turkish or use the microphone to record your voice or upload an MP3 file to translate Turkish audio to other languages."
)
if __name__ == "__main__":
demo.launch(debug=True)