dwb2023 commited on
Commit
bf63579
·
verified ·
1 Parent(s): ce2a906

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -12,15 +12,19 @@ from datetime import datetime
12
  from pathlib import Path
13
  from uuid import uuid4
14
  from functools import lru_cache
 
15
 
16
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
17
 
18
- MODEL_NAME = "openai/whisper-medium"
19
  BATCH_SIZE = 8
20
  YT_LENGTH_LIMIT_S = 4800 # 1 hour 20 minutes
21
 
22
  device = 0 if torch.cuda.is_available() else "cpu"
23
- pipe = pipeline(task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device)
 
 
 
24
 
25
  # Define paths and create directory if not exists
26
  JSON_DATASET_DIR = Path("json_dataset")
@@ -103,7 +107,6 @@ def yt_transcribe2(yt_url, task, max_filesize=75.0):
103
 
104
  return html_embed_str, text
105
 
106
-
107
  demo = gr.Blocks()
108
 
109
  yt_transcribe_interface = gr.Interface(
 
12
  from pathlib import Path
13
  from uuid import uuid4
14
  from functools import lru_cache
15
+ import bitsandbytes as bnb
16
 
17
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
18
 
19
+ MODEL_NAME = "openai/whisper-large-v3"
20
  BATCH_SIZE = 8
21
  YT_LENGTH_LIMIT_S = 4800 # 1 hour 20 minutes
22
 
23
  device = 0 if torch.cuda.is_available() else "cpu"
24
+
25
+ # Load model with bitsandbytes quantization
26
+ bnb_config = bnb.QuantizationConfig(bits=4)
27
+ pipe = pipeline(task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device, quantization_config=bnb_config)
28
 
29
  # Define paths and create directory if not exists
30
  JSON_DATASET_DIR = Path("json_dataset")
 
107
 
108
  return html_embed_str, text
109
 
 
110
  demo = gr.Blocks()
111
 
112
  yt_transcribe_interface = gr.Interface(