baglada kazuk commited on
Commit
d583248
0 Parent(s):

Duplicate from kazuk/youtube-whisper-10

Browse files

Co-authored-by: Kazuki Nakayashiki <kazuk@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +66 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Youtube Whisper
3
+ emoji: ⚡
4
+ colorFrom: green
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.16.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: unknown
11
+ duplicated_from: kazuk/youtube-whisper-10
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ from pytube import YouTube
4
+
5
+ def get_audio(url):
6
+ yt = YouTube(url)
7
+ return yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
8
+
9
+ def get_transcript(url, model_size, lang, format):
10
+
11
+ model = whisper.load_model(model_size)
12
+
13
+ if lang == "None":
14
+ lang = None
15
+
16
+ result = model.transcribe(get_audio(url), fp16=False, language=lang)
17
+
18
+ if format == "None":
19
+ return result["text"]
20
+ elif format == ".srt":
21
+ return format_to_srt(result["segments"])
22
+
23
+ def format_to_srt(segments):
24
+ output = ""
25
+ for i, segment in enumerate(segments):
26
+ output += f"{i + 1}\n"
27
+ output += f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
28
+ output += f"{segment['text']}\n\n"
29
+ return output
30
+
31
+ def format_timestamp(t):
32
+ hh = t//3600
33
+ mm = (t - hh*3600)//60
34
+ ss = t - hh*3600 - mm*60
35
+ mi = (t - int(t))*1000
36
+ return f"{int(hh):02d}:{int(mm):02d}:{int(ss):02d},{int(mi):03d}"
37
+
38
+
39
+ langs = ["None"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
40
+ model_size = list(whisper._MODELS.keys())
41
+
42
+ with gr.Blocks() as demo:
43
+
44
+ with gr.Row():
45
+
46
+ with gr.Column():
47
+
48
+ with gr.Row():
49
+ url = gr.Textbox(placeholder='Youtube video URL', label='URL')
50
+
51
+ with gr.Row():
52
+
53
+ model_size = gr.Dropdown(choices=model_size, value='tiny', label="Model")
54
+ lang = gr.Dropdown(choices=langs, value="None", label="Language (Optional)")
55
+ format = gr.Dropdown(choices=["None", ".srt"], value="None", label="Timestamps? (Optional)")
56
+
57
+ with gr.Row():
58
+ gr.Markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.")
59
+ transcribe_btn = gr.Button('Transcribe')
60
+
61
+ with gr.Column():
62
+ outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
63
+
64
+ transcribe_btn.click(get_transcript, inputs=[url, model_size, lang, format], outputs=outputs)
65
+
66
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers
2
+ pytube
3
+ git+https://github.com/openai/whisper.git