Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -12,15 +12,19 @@ from datetime import datetime
|
|
12 |
from pathlib import Path
|
13 |
from uuid import uuid4
|
14 |
from functools import lru_cache
|
|
|
15 |
|
16 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
17 |
|
18 |
-
MODEL_NAME = "openai/whisper-
|
19 |
BATCH_SIZE = 8
|
20 |
YT_LENGTH_LIMIT_S = 4800 # 1 hour 20 minutes
|
21 |
|
22 |
device = 0 if torch.cuda.is_available() else "cpu"
|
23 |
-
|
|
|
|
|
|
|
24 |
|
25 |
# Define paths and create directory if not exists
|
26 |
JSON_DATASET_DIR = Path("json_dataset")
|
@@ -103,7 +107,6 @@ def yt_transcribe2(yt_url, task, max_filesize=75.0):
|
|
103 |
|
104 |
return html_embed_str, text
|
105 |
|
106 |
-
|
107 |
demo = gr.Blocks()
|
108 |
|
109 |
yt_transcribe_interface = gr.Interface(
|
|
|
12 |
from pathlib import Path
|
13 |
from uuid import uuid4
|
14 |
from functools import lru_cache
|
15 |
+
import bitsandbytes as bnb
|
16 |
|
17 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
18 |
|
19 |
+
MODEL_NAME = "openai/whisper-large-v3"
|
20 |
BATCH_SIZE = 8
|
21 |
YT_LENGTH_LIMIT_S = 4800 # 1 hour 20 minutes
|
22 |
|
23 |
device = 0 if torch.cuda.is_available() else "cpu"
|
24 |
+
|
25 |
+
# Load model with bitsandbytes quantization
|
26 |
+
bnb_config = bnb.QuantizationConfig(bits=4)
|
27 |
+
pipe = pipeline(task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device, quantization_config=bnb_config)
|
28 |
|
29 |
# Define paths and create directory if not exists
|
30 |
JSON_DATASET_DIR = Path("json_dataset")
|
|
|
107 |
|
108 |
return html_embed_str, text
|
109 |
|
|
|
110 |
demo = gr.Blocks()
|
111 |
|
112 |
yt_transcribe_interface = gr.Interface(
|