GxPx commited on
Commit
3085a10
1 Parent(s): ff85b55

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from pytube import YouTube
4
+ from bs4 import BeautifulSoup
5
+ from moviepy.editor import VideoFileClip
6
+ import whisper
7
+ import argostranslate.package
8
+ import argostranslate.translate
9
+ import torch
10
+
11
+ st.title("YouTube Video Translator")
12
+ form = st.form("input URL")
13
+ url = form.text_input('YouTube Video URL:',value='',placeholder='Paste your video URL here')
14
+ selected_language = st.selectbox("Select your desired language:", ["Arabic", "Chinese", "French", "German", "Hindi", "Italian", "Japanese", "Polish", "Russian", "Spanish"])
15
+ to_code = ''
16
+ if selected_language == "Arabic":
17
+ to_code = "ar"
18
+ elif selected_language == "Chinese":
19
+ to_code = "zh"
20
+ elif selected_language == "French":
21
+ to_code = "fr"
22
+ elif selected_language == "German":
23
+ to_code = "de"
24
+ elif selected_language == "Hindi":
25
+ to_code = "hi"
26
+ elif selected_language == "Italian":
27
+ to_code = "it"
28
+ elif selected_language == "Japanese":
29
+ to_code = "ja"
30
+ elif selected_language == "Polish":
31
+ to_code = "pl"
32
+ elif selected_language == "Russian":
33
+ to_code = "ru"
34
+ elif selected_language == "Spanish":
35
+ to_code = "es"
36
+
37
+ form.form_submit_button("Proceed")
38
+
39
+
40
+ def get_videotext(url, to_code):
41
+ print("to_Code: ", to_code)
42
+ download_dir_video = "downloaded_videos"
43
+ os.makedirs(download_dir_video, exist_ok=True)
44
+ download_dir_captions = "downloaded_captions"
45
+ os.makedirs(download_dir_captions, exist_ok=True)
46
+ yt = YouTube(url)
47
+ video = yt.streams.filter(file_extension='mp4').first()
48
+ print("yt: ", yt.streams.first())
49
+ video.download(output_path=download_dir_video)
50
+ #get audio
51
+ audio_dir = "audio"
52
+ os.makedirs(audio_dir, exist_ok=True)
53
+
54
+ video_filename = os.listdir(download_dir_video)[0]
55
+ audio_file_name = os.path.splitext(video_filename)[0]
56
+ video_file_path = os.path.join(download_dir_video, video_filename)
57
+ audio_file_path = os.path.join(audio_dir, f"{audio_file_name}.wav")
58
+
59
+ video = VideoFileClip(video_file_path)
60
+ video.audio.write_audiofile(audio_file_path)
61
+
62
+ ##audio to text
63
+ model = whisper.load_model("base")
64
+
65
+ result = model.transcribe(os.path.join(audio_dir, f"{audio_file_name}.wav"))
66
+ segments = result.get("segments", [])
67
+
68
+ time_results = []
69
+ text_results = []
70
+
71
+ for segment in segments:
72
+ start_time_sec = segment.get("start", 0.0)
73
+ start_time_formatted = "{:02d}:{:02d}:{:06.3f}".format(
74
+ int(start_time_sec // 3600),
75
+ int((start_time_sec % 3600) // 60),
76
+ start_time_sec % 60
77
+ )
78
+ time_results.append(start_time_formatted)
79
+ text_results.append(segment.get("text", "").strip())
80
+
81
+ from_code = "en"
82
+ # to_code = "fr"
83
+
84
+ argostranslate.package.update_package_index()
85
+ available_packages = argostranslate.package.get_available_packages()
86
+ package_to_install = next(
87
+ filter(
88
+ lambda x: x.from_code == from_code and x.to_code == to_code, available_packages
89
+ )
90
+ )
91
+ argostranslate.package.install_from_path(package_to_install.download())
92
+
93
+ translated_text_results = [argostranslate.translate.translate(text, from_code, to_code) for text in text_results]
94
+
95
+
96
+ return time_results, text_results, translated_text_results
97
+
98
+ print(url)
99
+ column_width = 100
100
+ with st.spinner(f'Please wait while we create translations in {selected_language}...'):
101
+ if url:
102
+ time_results, text_results, translated_text_results= get_videotext(url, to_code)
103
+
104
+ table_style = """
105
+ <style>
106
+ table {
107
+ width: 100%;
108
+ border-collapse: collapse;
109
+ }
110
+ th, td {
111
+ padding: 8px;
112
+ text-align: left;
113
+ }
114
+ th {
115
+ background-color: #f2f2f2;
116
+ color: black;
117
+ }
118
+ </style>
119
+ """
120
+
121
+
122
+ table_header = "<tr><th>TimeStamp</th><th>Original</th><th>Translated</th></tr>"
123
+
124
+ table_rows = "".join(f"<tr><td>{time}</td><td>{text}</td><td>{translated_text}</td></tr>"
125
+ for time, text, translated_text in zip(time_results, text_results, translated_text_results))
126
+
127
+ table_html = f"<table>{table_header}{table_rows}</table>"
128
+
129
+
130
+ st.write(table_style, unsafe_allow_html=True)
131
+ st.write(table_html, unsafe_allow_html=True)