Spaces:
Runtime error
Runtime error
File size: 4,366 Bytes
3085a10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import streamlit as st
import os
from pytube import YouTube
from bs4 import BeautifulSoup
from moviepy.editor import VideoFileClip
import whisper
import argostranslate.package
import argostranslate.translate
import torch
st.title("YouTube Video Translator")
form = st.form("input URL")
url = form.text_input('YouTube Video URL:',value='',placeholder='Paste your video URL here')
selected_language = st.selectbox("Select your desired language:", ["Arabic", "Chinese", "French", "German", "Hindi", "Italian", "Japanese", "Polish", "Russian", "Spanish"])
to_code = ''
if selected_language == "Arabic":
to_code = "ar"
elif selected_language == "Chinese":
to_code = "zh"
elif selected_language == "French":
to_code = "fr"
elif selected_language == "German":
to_code = "de"
elif selected_language == "Hindi":
to_code = "hi"
elif selected_language == "Italian":
to_code = "it"
elif selected_language == "Japanese":
to_code = "ja"
elif selected_language == "Polish":
to_code = "pl"
elif selected_language == "Russian":
to_code = "ru"
elif selected_language == "Spanish":
to_code = "es"
form.form_submit_button("Proceed")
def get_videotext(url, to_code):
print("to_Code: ", to_code)
download_dir_video = "downloaded_videos"
os.makedirs(download_dir_video, exist_ok=True)
download_dir_captions = "downloaded_captions"
os.makedirs(download_dir_captions, exist_ok=True)
yt = YouTube(url)
video = yt.streams.filter(file_extension='mp4').first()
print("yt: ", yt.streams.first())
video.download(output_path=download_dir_video)
#get audio
audio_dir = "audio"
os.makedirs(audio_dir, exist_ok=True)
video_filename = os.listdir(download_dir_video)[0]
audio_file_name = os.path.splitext(video_filename)[0]
video_file_path = os.path.join(download_dir_video, video_filename)
audio_file_path = os.path.join(audio_dir, f"{audio_file_name}.wav")
video = VideoFileClip(video_file_path)
video.audio.write_audiofile(audio_file_path)
##audio to text
model = whisper.load_model("base")
result = model.transcribe(os.path.join(audio_dir, f"{audio_file_name}.wav"))
segments = result.get("segments", [])
time_results = []
text_results = []
for segment in segments:
start_time_sec = segment.get("start", 0.0)
start_time_formatted = "{:02d}:{:02d}:{:06.3f}".format(
int(start_time_sec // 3600),
int((start_time_sec % 3600) // 60),
start_time_sec % 60
)
time_results.append(start_time_formatted)
text_results.append(segment.get("text", "").strip())
from_code = "en"
# to_code = "fr"
argostranslate.package.update_package_index()
available_packages = argostranslate.package.get_available_packages()
package_to_install = next(
filter(
lambda x: x.from_code == from_code and x.to_code == to_code, available_packages
)
)
argostranslate.package.install_from_path(package_to_install.download())
translated_text_results = [argostranslate.translate.translate(text, from_code, to_code) for text in text_results]
return time_results, text_results, translated_text_results
print(url)
column_width = 100
with st.spinner(f'Please wait while we create translations in {selected_language}...'):
if url:
time_results, text_results, translated_text_results= get_videotext(url, to_code)
table_style = """
<style>
table {
width: 100%;
border-collapse: collapse;
}
th, td {
padding: 8px;
text-align: left;
}
th {
background-color: #f2f2f2;
color: black;
}
</style>
"""
table_header = "<tr><th>TimeStamp</th><th>Original</th><th>Translated</th></tr>"
table_rows = "".join(f"<tr><td>{time}</td><td>{text}</td><td>{translated_text}</td></tr>"
for time, text, translated_text in zip(time_results, text_results, translated_text_results))
table_html = f"<table>{table_header}{table_rows}</table>"
st.write(table_style, unsafe_allow_html=True)
st.write(table_html, unsafe_allow_html=True) |