Alex Volkov commited on
Commit
749c554
·
1 Parent(s): 7ce50df

comitted the good bits, let's see

Browse files
Files changed (6) hide show
  1. app.py +20 -5
  2. download.py +5 -5
  3. fonts/Lato-Black.ttf +0 -0
  4. fonts/lato.ttf +0 -0
  5. utils/apis.py +1 -1
  6. utils/subs.py +11 -4
app.py CHANGED
@@ -22,7 +22,8 @@ preload_model: str = args.get("preload")
22
  url_input = gr.Textbox(label="Youtube/Twitter/etc video URL (supports many services)", value='https://twitter.com/starsonxh/status/1552945347194142720', lines=1, elem_id="url_input")
23
  # download_status = gr.Textbox(label="Status:", value='', lines=1, elem_id="download_status")
24
  download_status = gr.Checkbox(label="Status:", elem_id="download_status", interactive=False)
25
- init_video = gr.Video(label="Downloaded video", visible=False)
 
26
  init_audio = gr.Audio(label="Downloaded audio", visible=False)
27
  output_text = gr.Textbox(label="Output text", lines=5, visible=False, max_lines=10, interactive=True)
28
  sub_video = gr.Video(label="Subbed video", visible=False, mirror_webcam=False)
@@ -71,9 +72,23 @@ css = """
71
  #input_row{
72
  position: relative;
73
  }
74
- .gradio-interface #submit{
75
-
76
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  """
78
  with gr.Blocks(css=css+"") as demo:
79
  gr.Markdown('# Vid Translator 0.1 - get english subtitles for videos in any language')
@@ -82,9 +97,9 @@ with gr.Blocks(css=css+"") as demo:
82
  with gr.Row(elem_id="input_row"):
83
  with gr.Group() as group:
84
  url_input.render()
85
- greet_btn = gr.Button("Translate", elem_id='submit', variant='primary')
86
-
87
  download_status.render()
 
88
  with gr.Row():
89
  with gr.Column():
90
 
 
22
  url_input = gr.Textbox(label="Youtube/Twitter/etc video URL (supports many services)", value='https://twitter.com/starsonxh/status/1552945347194142720', lines=1, elem_id="url_input")
23
  # download_status = gr.Textbox(label="Status:", value='', lines=1, elem_id="download_status")
24
  download_status = gr.Checkbox(label="Status:", elem_id="download_status", interactive=False)
25
+ translate_action = gr.Checkbox(label="Auto translate to english", elem_id='translate_toggle', interactive=True, value=True)
26
+ init_video = gr.Video(label="Upload video manually", visible=True, interactive=True)
27
  init_audio = gr.Audio(label="Downloaded audio", visible=False)
28
  output_text = gr.Textbox(label="Output text", lines=5, visible=False, max_lines=10, interactive=True)
29
  sub_video = gr.Video(label="Subbed video", visible=False, mirror_webcam=False)
 
72
  #input_row{
73
  position: relative;
74
  }
75
+ #url_input_group .gr-form:nth-child(2){
76
+ position:relative
77
  }
78
+ #translate_toggle{
79
+ position: absolute;
80
+ right: 0;
81
+ width: auto;
82
+ flex: none;
83
+ }
84
+
85
+ #fake_ass_group{
86
+ display:none;
87
+ visibility: hidden;
88
+ position:absolute;
89
+ pointer-events: none;
90
+ }
91
+
92
  """
93
  with gr.Blocks(css=css+"") as demo:
94
  gr.Markdown('# Vid Translator 0.1 - get english subtitles for videos in any language')
 
97
  with gr.Row(elem_id="input_row"):
98
  with gr.Group() as group:
99
  url_input.render()
100
+ action_btn = gr.Button(elem_id='submit', variant='primary', value="Translate")
 
101
  download_status.render()
102
+ translate_action.render()
103
  with gr.Row():
104
  with gr.Column():
105
 
download.py CHANGED
@@ -27,15 +27,15 @@ if preload_model:
27
  print("Preloading model")
28
  model = whisper.load_model(model_size)
29
 
30
- def download_generator(url):
31
 
32
  ### Step 1 : check if video is available
33
  yield {"message": f"Checking {url} for videos"}
34
  try:
35
  meta = check_download(url)
36
  print(json.dumps(meta, indent=2))
37
- if(meta['duration'] > 5 * 60):
38
- raise Exception("Video is too long, please use videos less than 5 minutes")
39
  yield {"message": f"Found video with {meta['duration']} seconds duration from {meta['extractor']}", "meta": meta}
40
 
41
  tempdir = output_dir/f"{meta['id']}"
@@ -150,12 +150,12 @@ def check_download(url):
150
  else:
151
  return meta
152
 
153
- def transcribe(audio):
154
  print('Starting transcribe...')
155
  global model
156
  if not preload_model:
157
  model = whisper.load_model(model_size)
158
- output = model.transcribe(audio, task="translate")
159
  output["language"] = LANGUAGES[output["language"]]
160
  output['segments'] = [{"id": 0, "seek": 0, "start": 0.0, "end": 3, "text": " [AI translation by @vidtranslator]"}] + output['segments']
161
  print(f'Finished transcribe from {output["language"]}', output["text"])
 
27
  print("Preloading model")
28
  model = whisper.load_model(model_size)
29
 
30
+ def download_generator(url, translate_action=True):
31
 
32
  ### Step 1 : check if video is available
33
  yield {"message": f"Checking {url} for videos"}
34
  try:
35
  meta = check_download(url)
36
  print(json.dumps(meta, indent=2))
37
+ if(meta['duration'] > 159):
38
+ raise Exception("Video is too long, please use videos less than 159 seconds")
39
  yield {"message": f"Found video with {meta['duration']} seconds duration from {meta['extractor']}", "meta": meta}
40
 
41
  tempdir = output_dir/f"{meta['id']}"
 
150
  else:
151
  return meta
152
 
153
+ def transcribe(audio, translate_action=True):
154
  print('Starting transcribe...')
155
  global model
156
  if not preload_model:
157
  model = whisper.load_model(model_size)
158
+ output = model.transcribe(audio, task="translate" if translate_action else "transcribe" )
159
  output["language"] = LANGUAGES[output["language"]]
160
  output['segments'] = [{"id": 0, "seek": 0, "start": 0.0, "end": 3, "text": " [AI translation by @vidtranslator]"}] + output['segments']
161
  print(f'Finished transcribe from {output["language"]}', output["text"])
fonts/Lato-Black.ttf ADDED
Binary file (69.5 kB). View file
 
fonts/lato.ttf ADDED
Binary file (69.5 kB). View file
 
utils/apis.py CHANGED
@@ -63,7 +63,7 @@ def test_api(url=''):
63
  return f"I've slept for 15 seconds and now I'm done. "
64
 
65
  def render_api_elements(url_input, download_status, output_text, sub_video):
66
- with gr.Group() as api_buttons:
67
  # This is a hack to get APIs registered with the blocks interface
68
  translate_result = gr.Textbox(visible=False)
69
  translate_language = gr.Textbox(visible=False)
 
63
  return f"I've slept for 15 seconds and now I'm done. "
64
 
65
  def render_api_elements(url_input, download_status, output_text, sub_video):
66
+ with gr.Group(elem_id='fake_ass_group') as api_buttons:
67
  # This is a hack to get APIs registered with the blocks interface
68
  translate_result = gr.Textbox(visible=False)
69
  translate_language = gr.Textbox(visible=False)
utils/subs.py CHANGED
@@ -10,18 +10,25 @@ from typing import Iterator, TextIO
10
  def bake_subs(input_file, output_file, subs_file, fontsdir):
11
  print(f"Baking {subs_file} into video... {input_file} -> {output_file}")
12
 
13
- fontfile = fontsdir / 'arial.ttf'
14
- watermarkfile = fontsdir / 'watermarksmol.png'
15
- fontstyle = 'Fontsize=18,OutlineColour=&H40000000,BorderStyle=3,FontName=Arial'
16
  video = ffmpeg.input(input_file)
17
  watermark = ffmpeg.input(watermarkfile)
18
  audio = video.audio
 
 
 
 
 
 
 
19
  (
20
  ffmpeg.concat(
21
  video.filter('subtitles', subs_file, fontsdir=fontfile, force_style=fontstyle),
22
  audio, v=1, a=1
23
  )
24
- .overlay(watermark, x='10', y='10')
25
  .output(filename=output_file)
26
  .run(quiet=True, overwrite_output=True)
27
  )
 
10
  def bake_subs(input_file, output_file, subs_file, fontsdir):
11
  print(f"Baking {subs_file} into video... {input_file} -> {output_file}")
12
 
13
+ fontfile = fontsdir / 'Lato-Black.ttf'
14
+ watermarkfile = fontsdir / 'watermark.png'
15
+
16
  video = ffmpeg.input(input_file)
17
  watermark = ffmpeg.input(watermarkfile)
18
  audio = video.audio
19
+ probe = ffmpeg.probe(input_file)
20
+ video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
21
+ iw = int(video_stream['width'])
22
+ ih = int(video_stream['height'])
23
+ print(f"width {iw} and height {ih}")
24
+ sub_size = 18 if iw > ih else 8
25
+ fontstyle = f'Fontsize={sub_size},OutlineColour=&H40000000,BorderStyle=3,FontName=Lato,Bold=1'
26
  (
27
  ffmpeg.concat(
28
  video.filter('subtitles', subs_file, fontsdir=fontfile, force_style=fontstyle),
29
  audio, v=1, a=1
30
  )
31
+ .overlay(watermark.filter('scale', iw / 3, -1), x='10', y='10')
32
  .output(filename=output_file)
33
  .run(quiet=True, overwrite_output=True)
34
  )