knowledge-scribe

Running on Zero

dwb2023 commited on Jun 9, 2024

Commit

bf63579

verified ·

1 Parent(s): ce2a906

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,15 +12,19 @@ from datetime import datetime
 from pathlib import Path
 from uuid import uuid4
 from functools import lru_cache
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-MODEL_NAME = "openai/whisper-medium"
 BATCH_SIZE = 8
 YT_LENGTH_LIMIT_S = 4800  # 1 hour 20 minutes
 device = 0 if torch.cuda.is_available() else "cpu"
-pipe = pipeline(task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device)
 # Define paths and create directory if not exists
 JSON_DATASET_DIR = Path("json_dataset")
@@ -103,7 +107,6 @@ def yt_transcribe2(yt_url, task, max_filesize=75.0):
     return html_embed_str, text
 demo = gr.Blocks()
 yt_transcribe_interface = gr.Interface(

 from pathlib import Path
 from uuid import uuid4
 from functools import lru_cache
+import bitsandbytes as bnb
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+MODEL_NAME = "openai/whisper-large-v3"
 BATCH_SIZE = 8
 YT_LENGTH_LIMIT_S = 4800  # 1 hour 20 minutes
 device = 0 if torch.cuda.is_available() else "cpu"
+# Load model with bitsandbytes quantization
+bnb_config = bnb.QuantizationConfig(bits=4)
+pipe = pipeline(task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device, quantization_config=bnb_config)
 # Define paths and create directory if not exists
 JSON_DATASET_DIR = Path("json_dataset")
     return html_embed_str, text
 demo = gr.Blocks()
 yt_transcribe_interface = gr.Interface(