tomriddle ayaanzaveri commited on
Commit
4af4e17
0 Parent(s):

Duplicate from ayaanzaveri/faster-whisper-api

Browse files

Co-authored-by: Ayaan Zaveri <ayaanzaveri@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +71 -0
  4. requirements.txt +10 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Faster Whisper Api
3
+ emoji: ⚡
4
+ colorFrom: blue
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.23.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: ayaanzaveri/faster-whisper-api
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pathlib
2
+ from faster_whisper import WhisperModel
3
+ import yt_dlp
4
+ import uuid
5
+ import os
6
+ import gradio as gr
7
+ from tqdm import tqdm
8
+
9
+ # List of all supported video sites here https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md
10
+ def download_convert_video_to_audio(
11
+ yt_dlp,
12
+ video_url: str,
13
+ destination_path: pathlib.Path,
14
+ ) -> None:
15
+ ydl_opts = {
16
+ "format": "bestaudio/best",
17
+ "postprocessors": [
18
+ { # Extract audio using ffmpeg
19
+ "key": "FFmpegExtractAudio",
20
+ "preferredcodec": "mp3",
21
+ }
22
+ ],
23
+ "outtmpl": f"{destination_path}.%(ext)s",
24
+ }
25
+ try:
26
+ print(f"Downloading video from {video_url}")
27
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
28
+ ydl.download(video_url)
29
+ print(f"Downloaded video from {video_url} to {destination_path}")
30
+ except Exception as e:
31
+ raise (e)
32
+
33
+ def segment_to_dict(segment):
34
+ segment = segment._asdict()
35
+ if segment["words"] is not None:
36
+ segment["words"] = [word._asdict() for word in segment["words"]]
37
+ return segment
38
+
39
+ def download_video(video_url: str):
40
+ download_convert_video_to_audio(yt_dlp, video_url, f"{uuid.uuid4().hex}")
41
+
42
+ def transcribe_video(video_url: str, word_timestamps: bool = True, model_size: str = "tiny"):
43
+ print(word_timestamps)
44
+ print("loading model")
45
+ model = WhisperModel(model_size, device="cpu", compute_type="int8")
46
+ # model = WhisperModel(model_size, device="cuda", compute_type="float16")
47
+ print("getting hex")
48
+ rand_id = uuid.uuid4().hex
49
+ print("doing download")
50
+ download_convert_video_to_audio(yt_dlp, video_url, f"{rand_id}")
51
+ segments, info = model.transcribe(f"{rand_id}.mp3", beam_size=5, word_timestamps=word_timestamps)
52
+ segments = [segment_to_dict(segment) for segment in segments]
53
+ total_duration = round(info.duration, 2) # Same precision as the Whisper timestamps.
54
+ print(info)
55
+ os.remove(f"{rand_id}.mp3")
56
+ print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
57
+ print(segments)
58
+ return segments
59
+
60
+ # print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
61
+
62
+ # for segment in segments:
63
+ # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
64
+
65
+ demo = gr.Interface(fn=transcribe_video, inputs=[
66
+ gr.Textbox(label="Video URL"),
67
+ gr.Checkbox(label="Word Timestamps", info="Do you want word timestamps in the response?"),
68
+ gr.Dropdown(label="Model", value="tiny", choices=["tiny", "base", "small"])
69
+ ], outputs="text")
70
+
71
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.74.*
2
+ requests==2.27.*
3
+ sentencepiece==0.1.*
4
+ torch==1.11.*
5
+ transformers==4.*
6
+ uvicorn[standard]==0.17.*
7
+ faster-whisper==0.3.0
8
+ yt-dlp==2023.3.4
9
+ ffmpeg-python==0.2.0
10
+ gradio