Alex Volkov commited on
Commit
cb23f19
·
1 Parent(s): fb5c5d4

language change

Browse files

Signed-off-by: Alex Volkov <alex.volkov@fundbox.com>

Files changed (7) hide show
  1. .DS_Store +0 -0
  2. .vscode/settings.json +5 -0
  3. app.py +76 -61
  4. download.py +47 -25
  5. environment.yml +8 -0
  6. fonts/.DS_Store +0 -0
  7. static/css/main.css +97 -0
.DS_Store ADDED
Binary file (8.2 kB). View file
 
.vscode/settings.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "files.associations": {
3
+ "*.yaml": "home-assistant"
4
+ }
5
+ }
app.py CHANGED
@@ -5,6 +5,7 @@ from download import download_generator
5
  import anvil.media
6
  import os
7
  import dotenv
 
8
 
9
  from utils.apis import render_api_elements
10
  from utils.utils import get_args
@@ -19,108 +20,122 @@ gradio_share: bool = args.get("public")
19
  model_size: str = args.get("model")
20
  preload_model: str = args.get("preload")
21
 
 
 
 
 
 
22
  url_input = gr.Textbox(label="Youtube/Twitter/etc video URL (supports many services)", value='https://twitter.com/starsonxh/status/1552945347194142720', lines=1, elem_id="url_input")
23
  # download_status = gr.Textbox(label="Status:", value='', lines=1, elem_id="download_status")
24
- download_status = gr.Checkbox(label="Status:", elem_id="download_status", interactive=False)
25
  translate_action = gr.Checkbox(label="Auto translate to english", elem_id='translate_toggle', interactive=True, value=True)
26
  init_video = gr.Video(label="Upload video manually", visible=True, interactive=True, mirror_webcam=False)
27
  init_audio = gr.Audio(label="Downloaded audio", visible=False)
28
- output_text = gr.Textbox(label="Output text", lines=5, visible=False, max_lines=10, interactive=True)
29
  sub_video = gr.Video(label="Subbed video", visible=False, mirror_webcam=False)
30
 
31
 
32
-
33
- def predownload(url, translate_action):
34
- for response in download_generator(url, translate_action):
35
  updates_object = {}
36
- updates_object[download_status] = gr.update(label=f"STATUS: {response.get('message')}")
37
  meta = response.get('meta')
 
38
  if 'video' in response:
39
  updates_object[init_video] = gr.update(visible=True, value=response["video"],
40
  label=f"Init Video: {meta['id']}.{meta['ext']}")
41
  updates_object[init_audio] = gr.update(visible=True, value=response["audio"],
42
  label=f"Extracted audio : {meta['id']}.mp3")
 
 
43
  if 'whisper_result' in response:
44
  updates_object[output_text] = gr.update(value=response['whisper_result'].get('srt'), visible=True,
45
- label=f"Subtitles translated from {response['whisper_result'].get('language')} (detected language)")
 
 
 
46
  if 'sub_video' in response:
47
  updates_object[sub_video] = gr.update(visible=True, value=response["sub_video"],
48
  label=f"Subbed video: {meta['id']}_translated.mp4")
 
49
 
 
50
  yield updates_object
51
 
52
- def rebake(output_text, sub_video):
53
- raise NotImplementedError("Rebake not implemented yet")
 
 
 
 
 
 
54
 
55
 
56
  subtitled_video = False
57
- css = """
58
- #submit{
59
- position: absolute;
60
- flex:0 !important;
61
- width: 120px;
62
- right: 13px;
63
- top: 40px;
64
- }
65
- #url_input{
66
- font-size: 40px !important;
67
- }
68
- #download_status{
69
- font-size: 40px !important;
70
- }
71
-
72
- #input_row{
73
- position: relative;
74
- }
75
- #url_input_group .gr-form:nth-child(2){
76
- position:relative
77
- }
78
- #translate_toggle{
79
- position: absolute;
80
- right: 0;
81
- width: auto;
82
- flex: none;
83
- }
84
-
85
- #fake_ass_group{
86
- display:none;
87
- visibility: hidden;
88
- position:absolute;
89
- pointer-events: none;
90
- }
91
-
92
- """
93
- with gr.Blocks(css=css+"") as demo:
94
- gr.Markdown('# Vid Translator 0.1 - get english subtitles for videos in any language')
95
- gr.Markdown('### Link to a tweet, youtube or other video and get a translated video with @openAi #whisper, built by [@altryne](https://twitter.com/altryne/)')
96
- gr.Markdown('### This is used as the backend for [@vidtranslator](https://twitter.com/vidtranslator/)')
97
  with gr.Row(elem_id="input_row"):
98
  with gr.Group() as group:
99
  url_input.render()
100
  action_btn = gr.Button(elem_id='submit', variant='primary', value="Translate")
101
- download_status.render()
102
- translate_action.render()
 
 
 
 
 
 
 
103
  with gr.Row():
104
  with gr.Column():
105
-
106
  init_video.render()
107
  init_audio.render()
 
 
 
108
  with gr.Column():
109
- with gr.Group():
110
- sub_video.render()
111
- output_text.render()
112
- gr.Button("Download srt file")
113
- rebake = gr.Button("Edit subtitles on video")
114
-
115
- outputs = [download_status, init_video, init_audio, output_text, sub_video]
116
- inputs = [url_input, translate_action]
 
117
  action_btn.click(fn=predownload, inputs=inputs, outputs=outputs, api_name='predownload')
118
  url_input.submit(fn=predownload, inputs=inputs, outputs=outputs)
119
- rebake.click(fn=rebake, inputs=[output_text, sub_video], outputs=[download_status, output_text, sub_video])
 
120
 
121
  translate_action.change(fn=lambda x: {action_btn: gr.update(value=f"Translate" if x else "Transcribe")},
122
  inputs=[translate_action], outputs=[action_btn])
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  # Render imported buttons for API bindings
125
  render_api_elements(url_input,download_status, output_text, sub_video)
126
 
 
5
  import anvil.media
6
  import os
7
  import dotenv
8
+ from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
9
 
10
  from utils.apis import render_api_elements
11
  from utils.utils import get_args
 
20
  model_size: str = args.get("model")
21
  preload_model: str = args.get("preload")
22
 
23
+
24
+ LANG_CHOICES = sorted([x.capitalize() for x in LANGUAGES.values()])
25
+ LANG_CHOICES.insert(0, "Autodetect")
26
+
27
+
28
  url_input = gr.Textbox(label="Youtube/Twitter/etc video URL (supports many services)", value='https://twitter.com/starsonxh/status/1552945347194142720', lines=1, elem_id="url_input")
29
  # download_status = gr.Textbox(label="Status:", value='', lines=1, elem_id="download_status")
30
+ download_status = gr.Checkbox(label="", elem_id="download_status", interactive=False)
31
  translate_action = gr.Checkbox(label="Auto translate to english", elem_id='translate_toggle', interactive=True, value=True)
32
  init_video = gr.Video(label="Upload video manually", visible=True, interactive=True, mirror_webcam=False)
33
  init_audio = gr.Audio(label="Downloaded audio", visible=False)
34
+ output_text = gr.Textbox(label="Output text", lines=5, visible=False, max_lines=10, interactive=True, elem_id="output_text")
35
  sub_video = gr.Video(label="Subbed video", visible=False, mirror_webcam=False)
36
 
37
 
38
+ def predownload(url, translate_action, source_language):
39
+ files = []
40
+ for response in download_generator(url, translate_action, source_language):
41
  updates_object = {}
42
+ updates_object[download_status] = gr.update(label=f"{response.get('message')}")
43
  meta = response.get('meta')
44
+
45
  if 'video' in response:
46
  updates_object[init_video] = gr.update(visible=True, value=response["video"],
47
  label=f"Init Video: {meta['id']}.{meta['ext']}")
48
  updates_object[init_audio] = gr.update(visible=True, value=response["audio"],
49
  label=f"Extracted audio : {meta['id']}.mp3")
50
+ files.append(response["video"])
51
+ files.append(response["audio"])
52
  if 'whisper_result' in response:
53
  updates_object[output_text] = gr.update(value=response['whisper_result'].get('srt'), visible=True,
54
+ label=f"Subtitles transcribed from {response['whisper_result'].get('language')} (detected language)")
55
+ if 'srt_path' in response:
56
+ files.append(response["srt_path"])
57
+
58
  if 'sub_video' in response:
59
  updates_object[sub_video] = gr.update(visible=True, value=response["sub_video"],
60
  label=f"Subbed video: {meta['id']}_translated.mp4")
61
+ files.append(response["sub_video"])
62
 
63
+ updates_object[output_file] = gr.update(value=files, visible=len(files) > 0, label=f"Output Files")
64
  yield updates_object
65
 
66
+ def correct_subtitles(url, output_text):
67
+ for response in download_generator(url, corrected_subtitles=output_text):
68
+ updates_object = {}
69
+ updates_object[download_status] = gr.update(label=f"STATUS: {response.get('message')}")
70
+ if 'sub_video' in response:
71
+ updates_object[sub_video] = gr.update(visible=True, value=response["sub_video"],
72
+ label=f"Corrected subtitles")
73
+ yield updates_object
74
 
75
 
76
  subtitled_video = False
77
+
78
+ with gr.Blocks(css='@import "file=static/css/main.css";', theme='darkpeach', title='Vid Translator Studio') as demo:
79
+ gr.HTML('<h1 class="main-title">VidTranslator Studio 0.1</h1>')
80
+ gr.HTML("<h2 class='secondary'>Automatic social media video translation from 99 languages</h2>")
81
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  with gr.Row(elem_id="input_row"):
83
  with gr.Group() as group:
84
  url_input.render()
85
  action_btn = gr.Button(elem_id='submit', variant='primary', value="Translate")
86
+ gr.StatusTracker()
87
+ with gr.Row(elem_id="second_row"):
88
+ source_language = gr.Dropdown(choices=LANG_CHOICES,
89
+ label="Source Language",
90
+ value='Autodetect',
91
+ interactive=True, elem_id="source_language")
92
+ download_status.render()
93
+ translate_action.render()
94
+
95
  with gr.Row():
96
  with gr.Column():
 
97
  init_video.render()
98
  init_audio.render()
99
+ with gr.Row():
100
+ output_file = gr.Files(label="Output Files", visible=False)
101
+
102
  with gr.Column():
103
+ output_text.render()
104
+ correct_btn = gr.Button("Correct subtitles")
105
+
106
+ with gr.Column():
107
+ sub_video.render()
108
+
109
+
110
+ outputs = [download_status, init_video, init_audio, output_text, sub_video, output_file ]
111
+ inputs = [url_input, translate_action, source_language]
112
  action_btn.click(fn=predownload, inputs=inputs, outputs=outputs, api_name='predownload')
113
  url_input.submit(fn=predownload, inputs=inputs, outputs=outputs)
114
+
115
+ correct_btn.click(fn=correct_subtitles, inputs=[url_input, output_text], outputs=[download_status, output_text, sub_video])
116
 
117
  translate_action.change(fn=lambda x: {action_btn: gr.update(value=f"Translate" if x else "Transcribe")},
118
  inputs=[translate_action], outputs=[action_btn])
119
 
120
+ gr.HTML("""<div class='footer'>
121
+ <div class="relative">
122
+ <div class="absolute inset-0 flex items-center" aria-hidden="true">
123
+ <div class="w-full border-t border-gray-300"></div>
124
+ </div>
125
+ <div class="relative flex justify-center">
126
+ <span class="bg-white px-2 text-sm text-gray-500">Continue</span>
127
+ </div>
128
+ </div>
129
+
130
+ <a href='https://twitter.com/altryne/'><img src='https://img.shields.io/badge/%40altryne-follow-green' alt=''></a>
131
+ </div>""")
132
+
133
+ def init_video_manual_upload(url, init_video):
134
+ print(url)
135
+ print(init_video)
136
+
137
+ init_video.change(fn=init_video_manual_upload, inputs=[url_input, init_video], outputs=[])
138
+
139
  # Render imported buttons for API bindings
140
  render_api_elements(url_input,download_status, output_text, sub_video)
141
 
download.py CHANGED
@@ -11,7 +11,7 @@ import tempfile
11
  import json
12
  import argparse
13
  import whisper
14
- from whisper.tokenizer import LANGUAGES
15
  import ffmpeg
16
  from utils.subs import bake_subs
17
  from utils.utils import get_args
@@ -27,9 +27,9 @@ if preload_model:
27
  print("Preloading model")
28
  model = whisper.load_model(model_size)
29
 
30
- def download_generator(url, translate_action=True):
31
 
32
- ### Step 1 : check if video is available
33
  yield {"message": f"Checking {url} for videos"}
34
  try:
35
  meta = check_download(url)
@@ -43,32 +43,46 @@ def download_generator(url, translate_action=True):
43
  yield {"message": f"{e}"}
44
  return
45
 
46
- ### Step 2 : Download video and extract audio
47
- yield {"message": f"Starting download with URL {url}, this may take a while"}
48
-
49
  try:
50
- meta, video, audio = download(url, tempdir)
 
 
 
 
 
 
 
51
  yield {"message": f"Downloaded video and extracted audio", "video": video, "audio": audio, "meta": meta}
 
52
  except Exception as e:
53
  os.chdir(original_dir)
54
  yield {"message": f"{e}"}
55
  raise e
56
 
57
- ### Step 3 : Transcribe with whisper
58
- yield {"message": f"[PLEASE WAIT] Starting whisper transcribe with {meta['id']}.mp3"}
59
- try:
60
- whisper_result = transcribe(audio, translate_action)
61
- srt_path = tempdir / f"{meta['id']}.srt"
62
- with open(srt_path, "w", encoding="utf-8") as srt:
63
- write_srt(whisper_result["segments"], file=srt)
64
 
65
- whisper_result["srt"] = Path(srt_path).read_text()
66
- yield {"message": f"Transcribe successful", "whisper_result": whisper_result, "meta": meta}
67
- except Exception as e:
68
- os.chdir(original_dir)
69
- yield {"message": f"{e}"}
70
- raise e
71
- #TODO: add return here so users can continue after editing subtitles
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  ### Step 4 : Bake subtitles into video with ffmpeg
74
  yield {"message": f"[PLEASE WAIT] baking subtitles into video"}
@@ -150,14 +164,22 @@ def check_download(url):
150
  else:
151
  return meta
152
 
153
- def transcribe(audio, translate_action=True):
154
- print(f'Starting ' + "translate" if translate_action else "transcribe")
 
155
  global model
156
  if not preload_model:
157
  model = whisper.load_model(model_size)
158
- output = model.transcribe(audio, task="translate" if translate_action else "transcribe")
 
 
 
 
 
 
 
159
  output["language"] = LANGUAGES[output["language"]]
160
- output['segments'] = [{"id": 0, "seek": 0, "start": 0.0, "end": 3, "text": " [AI translation by @vidtranslator]"}] + output['segments']
161
  print(f'Finished transcribe from {output["language"]}', output["text"])
162
  return output
163
 
 
11
  import json
12
  import argparse
13
  import whisper
14
+ from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
15
  import ffmpeg
16
  from utils.subs import bake_subs
17
  from utils.utils import get_args
 
27
  print("Preloading model")
28
  model = whisper.load_model(model_size)
29
 
30
+ def download_generator(url, translate_action=True, source_language='Autodetect', corrected_subtitles=None):
31
 
32
+ # Step 1 : check if video is available
33
  yield {"message": f"Checking {url} for videos"}
34
  try:
35
  meta = check_download(url)
 
43
  yield {"message": f"{e}"}
44
  return
45
 
46
+ # Step 2 : Download video and extract audio
 
 
47
  try:
48
+ # check if we already have the folder and the main files
49
+ if(tempdir.is_dir() and (tempdir/f"{meta['id']}.{meta['ext']}").is_file() and (tempdir/f"{meta['id']}.mp3").is_file()):
50
+ yield {"message": f"Using cached files"}
51
+ video = str((tempdir/f"{meta['id']}.{meta['ext']}").resolve())
52
+ audio = str((tempdir/f"{meta['id']}.mp3").resolve())
53
+ else:
54
+ yield {"message": f"Starting download with URL {url}, this may take a while"}
55
+ meta, video, audio = download(url, tempdir)
56
  yield {"message": f"Downloaded video and extracted audio", "video": video, "audio": audio, "meta": meta}
57
+
58
  except Exception as e:
59
  os.chdir(original_dir)
60
  yield {"message": f"{e}"}
61
  raise e
62
 
63
+ srt_path = tempdir / f"{meta['id']}.srt"
 
 
 
 
 
 
64
 
65
+ if not corrected_subtitles:
66
+ ### Step 3 : Transcribe with whisper
67
+ yield {"message": f"[PLEASE WAIT] Starting whisper transcribe with {meta['id']}.mp3"}
68
+ try:
69
+ whisper_result = transcribe(audio, translate_action, source_language)
70
+
71
+ with open(srt_path, "w", encoding="utf-8") as srt:
72
+ write_srt(whisper_result["segments"], file=srt)
73
+
74
+ whisper_result["srt"] = Path(srt_path).read_text()
75
+ yield {"message": f"Transcribe successful", "whisper_result": whisper_result, "meta": meta, "srt_path": srt_path}
76
+ except Exception as e:
77
+ os.chdir(original_dir)
78
+ yield {"message": f"{e}"}
79
+ raise e
80
+ else:
81
+ ### step 3.5 : use corrected subtitles
82
+ yield {"message": f"Using corrected subtitles"}
83
+ with open(srt_path, "w", encoding="utf-8") as srt:
84
+ srt.write(corrected_subtitles)
85
+ yield {"message": f"Transcribe successful", "srt_path": srt_path, "meta": meta}
86
 
87
  ### Step 4 : Bake subtitles into video with ffmpeg
88
  yield {"message": f"[PLEASE WAIT] baking subtitles into video"}
 
164
  else:
165
  return meta
166
 
167
+ def transcribe(audio, translate_action=True, language='Autodetect'):
168
+ task = "translate" if translate_action else "transcribe"
169
+ print(f'Starting {task} with whisper size {model_size}')
170
  global model
171
  if not preload_model:
172
  model = whisper.load_model(model_size)
173
+ props = {
174
+ "task": task,
175
+ }
176
+ if language != 'Autodetect':
177
+ props["language"] = TO_LANGUAGE_CODE[language.lower()]
178
+
179
+ output = model.transcribe(audio, task=task)
180
+
181
  output["language"] = LANGUAGES[output["language"]]
182
+ output['segments'] = [{"id": 0, "seek": 0, "start": 0.0, "end": 3, "text": " [AI transcription]"}] + output['segments']
183
  print(f'Finished transcribe from {output["language"]}', output["text"])
184
  return output
185
 
environment.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ name: vidnightly
2
+ channels:
3
+ - defaults
4
+ dependencies:
5
+ - pytorch
6
+ - torchvision
7
+ - torchaudio
8
+ prefix: /opt/homebrew/Caskroom/miniconda/base/envs/vidnightly
fonts/.DS_Store ADDED
Binary file (6.15 kB). View file
 
static/css/main.css ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@700&display=swap');
2
+
3
+ #download_status input[type=checkbox] {
4
+ display: none;
5
+ }
6
+ #download_status label{
7
+ height: 100%;
8
+ }
9
+ .main-title{
10
+ color: #FF7A7A;
11
+ font-family: 'Poppins', sans-serif;
12
+ font-size: 4.5em;
13
+ line-height: 1.25;
14
+ font-weight: 700;
15
+ background-image: linear-gradient(45deg, #695EE6 0%, #FF7A7A 85%);
16
+ -webkit-background-clip: text;
17
+ -webkit-text-fill-color: transparent;
18
+ }
19
+ .secondary{
20
+ color: #FF7A7A;
21
+ font-family: 'Poppins', sans-serif;
22
+ font-size: 2em;
23
+ line-height: 1.25;
24
+ font-weight: 700;
25
+ background-image: linear-gradient(45deg, #695EE6 0%, #FF7A7A 85%);
26
+ -webkit-background-clip: text;
27
+ -webkit-text-fill-color: transparent;
28
+ }
29
+ #submit{
30
+ position: absolute;
31
+ flex:0 !important;
32
+ width: 120px;
33
+ right: 13px;
34
+ top: 40px;
35
+ border-radius: 0 5px 5px 5px !important;
36
+ }
37
+ #url_input{
38
+ font-size: 20px !important;
39
+ }
40
+ #download_status label{
41
+ font-size: 18px !important;
42
+ }
43
+ #second_row>.gr-form{
44
+ border-top-left-radius: 0px !important;
45
+ border-top-right-radius: 0px !important;
46
+ }
47
+
48
+ #input_row{
49
+ position: relative;
50
+ }
51
+ #url_input_group .gr-form:nth-child(2){
52
+ position:relative
53
+ }
54
+ #url_input textarea{
55
+ font-size: 20px !important;
56
+ }
57
+
58
+ #source_language{
59
+ flex-grow: revert
60
+ }
61
+
62
+ #translate_toggle{
63
+ flex-grow: revert;
64
+ min-width: 200px;
65
+ }
66
+ #translate_toggle label{
67
+ height: 100%;
68
+ }
69
+
70
+ /*
71
+ #translate_toggle{
72
+ position: absolute;
73
+ right: 0;
74
+ width: auto;
75
+ flex: none;
76
+ background: transparent
77
+ }
78
+ */
79
+ .wrap.absolute{
80
+ position: relative !important;
81
+ opacity: 100% !important;
82
+ }
83
+
84
+ #fake_ass_group{
85
+ display:none;
86
+ visibility: hidden;
87
+ position:absolute;
88
+ pointer-events: none;
89
+ }
90
+ #output_text label{
91
+ font-size: 20px !important;
92
+ white-space: pre-wrap;
93
+ }
94
+
95
+ footer{
96
+ display: none !important;
97
+ }