Files changed (1) hide show
  1. app.py +346 -341
app.py CHANGED
@@ -1,342 +1,347 @@
1
- # coding=utf8
2
- # Youtube Video Translator
3
- # Developed by Ruslan Magana Vsevolodovna
4
- # https://ruslanmv.com/
5
-
6
- # importing all necessary libraries
7
- import pathlib
8
- import sys, os
9
- from gtts import gTTS
10
- import gradio as gr
11
- import os
12
- import speech_recognition as sr
13
- from googletrans import Translator, constants
14
- from pprint import pprint
15
- from moviepy.editor import *
16
- from pytube import YouTube
17
- from youtube_transcript_api import YouTubeTranscriptApi
18
- from utils import *
19
-
20
- def download_video(url):
21
- print("Downloading...")
22
- local_file = (
23
- YouTube(url)
24
- .streams.filter(progressive=True, file_extension="mp4")
25
- .first()
26
- .download()
27
- )
28
- print("Downloaded")
29
- return local_file
30
-
31
- def validate_youtube(url):
32
- #This creates a youtube objet
33
- try:
34
- yt = YouTube(url)
35
- except Exception:
36
- print("Hi there URL seems invalid")
37
- return True
38
- #This will return the length of the video in sec as an int
39
- video_length = yt.length
40
- if video_length > 600:
41
- print("Your video is larger than 10 minutes")
42
- return True
43
- else:
44
- print("Your video is less than 10 minutes")
45
- return False
46
-
47
- def validate_url(url):
48
- import validators
49
- if not validators.url(url):
50
- print("Hi there URL seems invalid ")
51
- return True
52
- else:
53
- return False
54
-
55
-
56
- def cleanup():
57
- import pathlib
58
- import glob
59
- types = ('*.mp4', '*.wav') # the tuple of file types
60
- #Finding mp4 and wave files
61
- junks = []
62
- for files in types:
63
- junks.extend(glob.glob(files))
64
- try:
65
- # Deleting those files
66
- for junk in junks:
67
- print("Deleting",junk)
68
- # Setting the path for the file to delete
69
- file = pathlib.Path(junk)
70
- # Calling the unlink method on the path
71
- file.unlink()
72
- except Exception:
73
- print("I cannot delete the file because it is being used by another process")
74
-
75
- def getSize(filename):
76
- st = os.stat(filename)
77
- return st.st_size
78
-
79
-
80
- def clean_transcript(transcript_list):
81
- script = ""
82
- for text in transcript_list:
83
- t = text["text"]
84
- if( (t != '[music]') and \
85
- (t != '[Music]') and \
86
- (t != '[музыка]') and \
87
- (t != '[Музыка]') and \
88
- (t != '[musik]') and \
89
- (t != '[Musik]') and \
90
- (t != '[musica]') and \
91
- (t != '[Musica]') and \
92
- (t != '[música]') and \
93
- (t != '[Música]') and \
94
- (t != '[音楽]') and \
95
- (t != '[音乐]')
96
- ) :
97
- script += t + " "
98
- return script
99
-
100
-
101
- def get_transcript(url,desired_language):
102
- id_you= url[url.index("=")+1:]
103
- try:
104
- # retrieve the available transcripts
105
- transcript_list = YouTubeTranscriptApi.list_transcripts(id_you)
106
-
107
- except Exception:
108
- print('TranscriptsDisabled:')
109
- is_translated = False
110
- return " ", " ", is_translated
111
-
112
- lista=[]
113
- transcript_translation_languages=[]
114
- # iterate over all available transcripts
115
- for transcript in transcript_list:
116
- lista.extend([
117
- transcript.language_code,
118
- transcript.is_generated,
119
- transcript.is_translatable,
120
- transcript_translation_languages.append(transcript.translation_languages),
121
- ])
122
- print(lista)
123
- n_size=int(len(lista)/4)
124
- print("There are {} avialable scripts".format(n_size))
125
- import numpy as np
126
- matrix = np.array(lista)
127
- shape = (n_size,4)
128
- matrix=matrix.reshape(shape)
129
- matrix=matrix.tolist()
130
- is_manually=False
131
- is_automatic=False
132
- for lista in matrix:
133
- #print(lista)
134
- language_code=lista[0]
135
- is_generated=lista[1]
136
- is_translatable=lista[2]
137
- if not is_generated and is_translatable :
138
- print("Script found manually generated")
139
- is_manually=True
140
- language_code_man=language_code
141
- if is_generated and is_translatable :
142
- print("Script found automatic generated")
143
- is_automatic=True
144
- language_code_au=language_code
145
-
146
- if is_manually:
147
- # we try filter for manually created transcripts
148
- print('We extract manually created transcripts')
149
- transcript = transcript_list.find_manually_created_transcript([language_code])
150
-
151
- elif is_automatic:
152
- print('We extract generated transcript')
153
- # or automatically generated ones, but not translated
154
- transcript = transcript_list.find_generated_transcript([language_code])
155
- else:
156
- print('We try find the transcript')
157
- # we directly filter for the language you are looking for, using the transcript list
158
- transcript = transcript_list.find_transcript([language_code])
159
-
160
- is_translated = False
161
- if is_translatable :
162
- for available_trad in transcript_translation_languages[0]:
163
- if available_trad['language_code']==desired_language:
164
- print("It was found the translation for lang:",desired_language)
165
- print('We translate directly the transcript')
166
- transcript_translated = transcript.translate(desired_language)
167
- transcript_translated=transcript_translated.fetch()
168
- translated=clean_transcript(transcript_translated)
169
- is_translated = True
170
- script_translated = ""
171
- if is_translated :
172
- script_translated = translated
173
-
174
- transcript=transcript.fetch()
175
- script = clean_transcript(transcript)
176
-
177
- return script, script_translated, is_translated
178
-
179
- # Set environment variables
180
- home_dir = os.getcwd()
181
- temp_dir=os.path.join(home_dir, "temp")
182
- #Create temp directory
183
- pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True)
184
- os.environ['home_dir'] = home_dir
185
- os.environ['temp_dir'] = temp_dir
186
-
187
- def video_to_translate(url,initial_language,final_language):
188
- print('Checking the url')
189
- check =validate_youtube(url)
190
- if check is True: return "./demo/tryagain2.mp4"
191
-
192
- #Internal definitions
193
- if initial_language == "English":
194
- lang_in='en-US'
195
- lang_api='en'
196
- elif initial_language == "Italian":
197
- lang_in='it-IT'
198
- lang_api='it'
199
- elif initial_language == "Spanish":
200
- lang_in='es-MX'
201
- lang_api='es'
202
- elif initial_language == "Russian":
203
- lang_in='ru-RU'
204
- lang_api='rus'
205
- elif initial_language == "German":
206
- lang_in='de-DE'
207
- lang_api='de'
208
- elif initial_language == "Japanese":
209
- lang_in='ja-JP'
210
- lang_api='ja'
211
- if final_language == "English":
212
- lang='en'
213
- elif final_language == "Italian":
214
- lang='it'
215
- elif final_language == "Spanish":
216
- lang='es'
217
- elif final_language == "Russian":
218
- lang='ru'
219
- elif final_language == "German":
220
- lang='de'
221
- elif final_language == "Japanese":
222
- lang='ja'
223
- # Initial directory
224
- home_dir= os.getenv('home_dir')
225
- print('Initial directory:',home_dir)
226
- # Cleaning previous files
227
- cleanup()
228
- file_obj=download_video(url)
229
- print(file_obj)
230
- # Insert Local Video File Path
231
- videoclip = VideoFileClip(file_obj)
232
- is_traduc=False
233
- # Trying to get transcripts
234
-
235
- text, trans, is_traduc = get_transcript(url,desired_language=lang)
236
- print("Transcript Found")
237
-
238
- if not is_traduc:
239
- print("No Transcript Found")
240
- # Trying to recognize audio
241
- # Insert Local Audio File Path
242
- videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le')
243
- # initialize the recognizer
244
- r = sr.Recognizer()
245
- # open the file
246
- with sr.AudioFile("audio.wav") as source:
247
- # listen for the data (load audio to memory)
248
- audio_data = r.record(source)
249
- # recognize (convert from speech to text)
250
- print("Recognize from ",lang_in)
251
- #There is a limit of 10 MB on all single requests sent to the API using local file
252
- size_wav=getSize("audio.wav")
253
- if size_wav > 50000000:
254
- print("The wav is too large")
255
- audio_chunks=split_audio_wav("audio.wav")
256
- text=""
257
- for chunk in audio_chunks:
258
- print("Converting audio to text",chunk)
259
- try:
260
- text_chunk= r.recognize_google(audio_data, language = lang_in)
261
- except Exception:
262
- print("This video cannot be recognized")
263
- cleanup()
264
- return "./demo/tryagain.mp4"
265
- text=text+text_chunk+" "
266
- text=str(text)
267
- print(type(text))
268
-
269
- else:
270
- try:
271
- text = r.recognize_google(audio_data, language = lang_in)
272
- except Exception:
273
- print("This video cannot be recognized")
274
- cleanup()
275
- return "./demo/tryagain.mp4"
276
-
277
- #print(text)
278
- print("Destination language ",lang)
279
-
280
- # init the Google API translator
281
- translator = Translator()
282
-
283
-
284
- try:
285
- translation = translator.translate(text, dest=lang)
286
- except Exception:
287
- print("This text cannot be translated")
288
- cleanup()
289
- return "./demo/tryagain.mp4"
290
-
291
- #translation.text
292
- trans=translation.text
293
-
294
- myobj = gTTS(text=trans, lang=lang, slow=False)
295
- myobj.save("audio.wav")
296
- # loading audio file
297
- audioclip = AudioFileClip("audio.wav")
298
-
299
- # adding audio to the video clip
300
- new_audioclip = CompositeAudioClip([audioclip])
301
- videoclip.audio = new_audioclip
302
- new_video="video_translated_"+lang+".mp4"
303
-
304
- # Return back to main directory
305
- os.chdir(home_dir)
306
- print('Final directory',os.getcwd())
307
-
308
- videoclip.write_videofile(new_video)
309
-
310
- videoclip.close()
311
- del file_obj
312
-
313
- return new_video
314
-
315
- initial_language = gr.inputs.Dropdown(["English","Italian","Japanese","Russian","Spanish","German"])
316
- final_language = gr.inputs.Dropdown([ "Russian","Italian","Spanish","German","English","Japanese"])
317
- url =gr.inputs.Textbox(label = "Enter the YouTube URL below:")
318
-
319
-
320
- gr.Interface(fn = video_to_translate,
321
- inputs = [url,initial_language,final_language],
322
- outputs = 'video',
323
- verbose = True,
324
- title = 'Video Youtube Translator',
325
- description = 'A simple application that translates Youtube small videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English and Japanese. Wait one minute to process.',
326
- article =
327
- '''<div>
328
- <p style="text-align: center"> All you need to do is to paste the Youtube link and hit submit,, then wait for compiling. After that click on Play/Pause for listing to the video. The video is saved in an mp4 format.
329
- The lenght video limit is 10 minutes. For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>
330
- </p>
331
- </div>''',
332
-
333
- examples = [
334
- ["https://www.youtube.com/watch?v=uLVRZE8OAI4", "English","Spanish"],
335
- ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"],
336
- ["https://www.youtube.com/watch?v=6Q6hFtitthQ", "Italian","English"],
337
- ["https://www.youtube.com/watch?v=s5XvjAC7ai8", "Russian","English"],
338
- ["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"],
339
- ["https://www.youtube.com/watch?v=nOGZvu6tJFE", "German","Spanish"]
340
-
341
- ]
 
 
 
 
 
342
  ).launch()
 
1
+ # coding=utf8
2
+ # Youtube Video Translator
3
+ # Developed by Ruslan Magana Vsevolodovna
4
+ # https://ruslanmv.com/
5
+
6
+ # importing all necessary libraries
7
+ import pathlib
8
+ import sys, os
9
+ from gtts import gTTS
10
+ import gradio as gr
11
+ import os
12
+ import speech_recognition as sr
13
+ from googletrans import Translator, constants
14
+ from pprint import pprint
15
+ from moviepy.editor import *
16
+ from pytube import YouTube
17
+ from youtube_transcript_api import YouTubeTranscriptApi
18
+ from utils import *
19
+
20
+ def download_video(url):
21
+ print("Downloading...")
22
+ local_file = (
23
+ YouTube(url)
24
+ .streams.filter(progressive=True, file_extension="mp4")
25
+ .first()
26
+ .download()
27
+ )
28
+ print("Downloaded")
29
+ return local_file
30
+
31
+ def validate_youtube(url):
32
+ #This creates a youtube objet
33
+ try:
34
+ yt = YouTube(url)
35
+ except Exception:
36
+ print("Hi there URL seems invalid")
37
+ return True
38
+ #This will return the length of the video in sec as an int
39
+ video_length = yt.length
40
+ if video_length > 600:
41
+ print("Your video is larger than 10 minutes")
42
+ return True
43
+ else:
44
+ print("Your video is less than 10 minutes")
45
+ return False
46
+
47
+ def validate_url(url):
48
+ import validators
49
+ if not validators.url(url):
50
+ print("Hi there URL seems invalid ")
51
+ return True
52
+ else:
53
+ return False
54
+
55
+
56
+ def cleanup():
57
+ import pathlib
58
+ import glob
59
+ types = ('*.mp4', '*.wav') # the tuple of file types
60
+ #Finding mp4 and wave files
61
+ junks = []
62
+ for files in types:
63
+ junks.extend(glob.glob(files))
64
+ try:
65
+ # Deleting those files
66
+ for junk in junks:
67
+ print("Deleting",junk)
68
+ # Setting the path for the file to delete
69
+ file = pathlib.Path(junk)
70
+ # Calling the unlink method on the path
71
+ file.unlink()
72
+ except Exception:
73
+ print("I cannot delete the file because it is being used by another process")
74
+
75
+ def getSize(filename):
76
+ st = os.stat(filename)
77
+ return st.st_size
78
+
79
+
80
+ def clean_transcript(transcript_list):
81
+ script = ""
82
+ for text in transcript_list:
83
+ t = text["text"]
84
+ if( (t != '[music]') and \
85
+ (t != '[Music]') and \
86
+ (t != '[музыка]') and \
87
+ (t != '[Музыка]') and \
88
+ (t != '[musik]') and \
89
+ (t != '[Musik]') and \
90
+ (t != '[musica]') and \
91
+ (t != '[Musica]') and \
92
+ (t != '[música]') and \
93
+ (t != '[Música]') and \
94
+ (t != '[音楽]') and \
95
+ (t != '[音乐]')
96
+ ) :
97
+ script += t + " "
98
+ return script
99
+
100
+
101
+ def get_transcript(url,desired_language):
102
+ id_you= url[url.index("=")+1:]
103
+ try:
104
+ # retrieve the available transcripts
105
+ transcript_list = YouTubeTranscriptApi.list_transcripts(id_you)
106
+
107
+ except Exception:
108
+ print('TranscriptsDisabled:')
109
+ is_translated = False
110
+ return " ", " ", is_translated
111
+
112
+ lista=[]
113
+ transcript_translation_languages=[]
114
+ # iterate over all available transcripts
115
+ for transcript in transcript_list:
116
+ lista.extend([
117
+ transcript.language_code,
118
+ transcript.is_generated,
119
+ transcript.is_translatable,
120
+ transcript_translation_languages.append(transcript.translation_languages),
121
+ ])
122
+ print(lista)
123
+ n_size=int(len(lista)/4)
124
+ print("There are {} avialable scripts".format(n_size))
125
+ import numpy as np
126
+ matrix = np.array(lista)
127
+ shape = (n_size,4)
128
+ matrix=matrix.reshape(shape)
129
+ matrix=matrix.tolist()
130
+ is_manually=False
131
+ is_automatic=False
132
+ for lista in matrix:
133
+ #print(lista)
134
+ language_code=lista[0]
135
+ is_generated=lista[1]
136
+ is_translatable=lista[2]
137
+ if not is_generated and is_translatable :
138
+ print("Script found manually generated")
139
+ is_manually=True
140
+ language_code_man=language_code
141
+ if is_generated and is_translatable :
142
+ print("Script found automatic generated")
143
+ is_automatic=True
144
+ language_code_au=language_code
145
+
146
+ if is_manually:
147
+ # we try filter for manually created transcripts
148
+ print('We extract manually created transcripts')
149
+ transcript = transcript_list.find_manually_created_transcript([language_code])
150
+
151
+ elif is_automatic:
152
+ print('We extract generated transcript')
153
+ # or automatically generated ones, but not translated
154
+ transcript = transcript_list.find_generated_transcript([language_code])
155
+ else:
156
+ print('We try find the transcript')
157
+ # we directly filter for the language you are looking for, using the transcript list
158
+ transcript = transcript_list.find_transcript([language_code])
159
+
160
+ is_translated = False
161
+ if is_translatable :
162
+ for available_trad in transcript_translation_languages[0]:
163
+ if available_trad['language_code']==desired_language:
164
+ print("It was found the translation for lang:",desired_language)
165
+ print('We translate directly the transcript')
166
+ transcript_translated = transcript.translate(desired_language)
167
+ transcript_translated=transcript_translated.fetch()
168
+ translated=clean_transcript(transcript_translated)
169
+ is_translated = True
170
+ script_translated = ""
171
+ if is_translated :
172
+ script_translated = translated
173
+
174
+ transcript=transcript.fetch()
175
+ script = clean_transcript(transcript)
176
+
177
+ return script, script_translated, is_translated
178
+
179
+ # Set environment variables
180
+ home_dir = os.getcwd()
181
+ temp_dir=os.path.join(home_dir, "temp")
182
+ #Create temp directory
183
+ pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True)
184
+ os.environ['home_dir'] = home_dir
185
+ os.environ['temp_dir'] = temp_dir
186
+
187
+ def video_to_translate(url,initial_language,final_language):
188
+ print('Checking the url')
189
+ check =validate_youtube(url)
190
+ if check is True: return "./demo/tryagain2.mp4"
191
+
192
+ #Internal definitions
193
+ if initial_language == "English":
194
+ lang_in='en-US'
195
+ lang_api='en'
196
+ elif initial_language == "Italian":
197
+ lang_in='it-IT'
198
+ lang_api='it'
199
+ elif initial_language == "Chinese":
200
+ lang_in='zh-CN'
201
+ lang_api='zh'
202
+ elif initial_language == "Spanish":
203
+ lang_in='es-MX'
204
+ lang_api='es'
205
+ elif initial_language == "Russian":
206
+ lang_in='ru-RU'
207
+ lang_api='rus'
208
+ elif initial_language == "German":
209
+ lang_in='de-DE'
210
+ lang_api='de'
211
+ elif initial_language == "Japanese":
212
+ lang_in='ja-JP'
213
+ lang_api='ja'
214
+ if final_language == "English":
215
+ lang='en'
216
+ elif final_language == "Italian":
217
+ lang='it'
218
+ elif final_language == "Spanish":
219
+ lang='es'
220
+ elif final_language == "Russian":
221
+ lang='ru'
222
+ elif final_language == "German":
223
+ lang='de'
224
+ elif final_language == "Vietnamese":
225
+ lang='vi'
226
+ elif final_language == "Japanese":
227
+ lang='ja'
228
+ # Initial directory
229
+ home_dir= os.getenv('home_dir')
230
+ print('Initial directory:',home_dir)
231
+ # Cleaning previous files
232
+ cleanup()
233
+ file_obj=download_video(url)
234
+ print(file_obj)
235
+ # Insert Local Video File Path
236
+ videoclip = VideoFileClip(file_obj)
237
+ is_traduc=False
238
+ # Trying to get transcripts
239
+
240
+ text, trans, is_traduc = get_transcript(url,desired_language=lang)
241
+ print("Transcript Found")
242
+
243
+ if not is_traduc:
244
+ print("No Transcript Found")
245
+ # Trying to recognize audio
246
+ # Insert Local Audio File Path
247
+ videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le')
248
+ # initialize the recognizer
249
+ r = sr.Recognizer()
250
+ # open the file
251
+ with sr.AudioFile("audio.wav") as source:
252
+ # listen for the data (load audio to memory)
253
+ audio_data = r.record(source)
254
+ # recognize (convert from speech to text)
255
+ print("Recognize from ",lang_in)
256
+ #There is a limit of 10 MB on all single requests sent to the API using local file
257
+ size_wav=getSize("audio.wav")
258
+ if size_wav > 50000000:
259
+ print("The wav is too large")
260
+ audio_chunks=split_audio_wav("audio.wav")
261
+ text=""
262
+ for chunk in audio_chunks:
263
+ print("Converting audio to text",chunk)
264
+ try:
265
+ text_chunk= r.recognize_google(audio_data, language = lang_in)
266
+ except Exception:
267
+ print("This video cannot be recognized")
268
+ cleanup()
269
+ return "./demo/tryagain.mp4"
270
+ text=text+text_chunk+" "
271
+ text=str(text)
272
+ print(type(text))
273
+
274
+ else:
275
+ try:
276
+ text = r.recognize_google(audio_data, language = lang_in)
277
+ except Exception:
278
+ print("This video cannot be recognized")
279
+ cleanup()
280
+ return "./demo/tryagain.mp4"
281
+
282
+ #print(text)
283
+ print("Destination language ",lang)
284
+
285
+ # init the Google API translator
286
+ translator = Translator()
287
+
288
+
289
+ try:
290
+ translation = translator.translate(text, dest=lang)
291
+ except Exception:
292
+ print("This text cannot be translated")
293
+ cleanup()
294
+ return "./demo/tryagain.mp4"
295
+
296
+ #translation.text
297
+ trans=translation.text
298
+
299
+ myobj = gTTS(text=trans, lang=lang, slow=False)
300
+ myobj.save("audio.wav")
301
+ # loading audio file
302
+ audioclip = AudioFileClip("audio.wav")
303
+
304
+ # adding audio to the video clip
305
+ new_audioclip = CompositeAudioClip([audioclip])
306
+ videoclip.audio = new_audioclip
307
+ new_video="video_translated_"+lang+".mp4"
308
+
309
+ # Return back to main directory
310
+ os.chdir(home_dir)
311
+ print('Final directory',os.getcwd())
312
+
313
+ videoclip.write_videofile(new_video)
314
+
315
+ videoclip.close()
316
+ del file_obj
317
+
318
+ return new_video
319
+
320
+ initial_language = gr.inputs.Dropdown(["English","Italian","Japanese","Russian","Spanish","German"])
321
+ final_language = gr.inputs.Dropdown([ "Russian","Italian","Spanish","German","English","Japanese"])
322
+ url =gr.inputs.Textbox(label = "Enter the YouTube URL below:")
323
+
324
+
325
+ gr.Interface(fn = video_to_translate,
326
+ inputs = [url,initial_language,final_language],
327
+ outputs = 'video',
328
+ verbose = True,
329
+ title = 'Video Youtube Translator',
330
+ description = 'A simple application that translates Youtube small videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English and Japanese. Wait one minute to process.',
331
+ article =
332
+ '''<div>
333
+ <p style="text-align: center"> All you need to do is to paste the Youtube link and hit submit,, then wait for compiling. After that click on Play/Pause for listing to the video. The video is saved in an mp4 format.
334
+ The lenght video limit is 10 minutes. For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>
335
+ </p>
336
+ </div>''',
337
+
338
+ examples = [
339
+ ["https://www.youtube.com/watch?v=uLVRZE8OAI4", "English","Spanish"],
340
+ ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"],
341
+ ["https://www.youtube.com/watch?v=6Q6hFtitthQ", "Italian","English"],
342
+ ["https://www.youtube.com/watch?v=s5XvjAC7ai8", "Russian","English"],
343
+ ["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"],
344
+ ["https://www.youtube.com/watch?v=nOGZvu6tJFE", "German","Spanish"]
345
+
346
+ ]
347
  ).launch()