knowledge-scribe

Running on Zero

App Files Files Community

dwb2023 commited on Jun 10, 2024

Commit

6b09585

verified ·

1 Parent(s): 741549e

Update app.py

Browse files

use quantized model

Files changed (1) hide show

app.py +9 -12

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import yt_dlp as youtube_dl
-from transformers import pipeline, BitsAndBytesConfig
 from transformers.pipelines.audio_utils import ffmpeg_read
 import torch
 from huggingface_hub import CommitScheduler
@@ -12,24 +12,21 @@ from datetime import datetime
 from pathlib import Path
 from uuid import uuid4
 from functools import lru_cache
-import bitsandbytes as bnb
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-MODEL_NAME = "openai/whisper-large-v3"
 BATCH_SIZE = 8
 YT_LENGTH_LIMIT_S = 4800  # 1 hour 20 minutes
 device = 0 if torch.cuda.is_available() else "cpu"
-# Load model with bitsandbytes quantization
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.float16,
-)
-# bnb_config = bnb.QuantizationConfig(bits=4)
-pipe = pipeline(task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device, quantization_config=quantization_config)
 # Define paths and create directory if not exists
 JSON_DATASET_DIR = Path("json_dataset")
@@ -52,7 +49,7 @@ def _return_yt_html_embed(yt_url):
     )
     return HTML_str
-@spaces.GPU(duration=120)
 @lru_cache(maxsize=10)
 def transcribe_audio(inputs, task):
     if inputs is None:
@@ -75,7 +72,7 @@ def download_yt_audio(yt_url, filename):
     with youtube_dl.YoutubeDL(ydl_opts) as ydl:
         ydl.download([yt_url])
-@spaces.GPU(duration=120)
 @lru_cache(maxsize=10)
 def yt_transcribe(yt_url, task):
     with tempfile.TemporaryDirectory() as tmpdirname:

 import gradio as gr
 import yt_dlp as youtube_dl
+from transformers import pipeline, WhisperForConditionalGeneration, WhisperTokenizer
 from transformers.pipelines.audio_utils import ffmpeg_read
 import torch
 from huggingface_hub import CommitScheduler
 from pathlib import Path
 from uuid import uuid4
 from functools import lru_cache
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+MODEL_NAME = "dwb2023/whisper-large-v3-quantized"
 BATCH_SIZE = 8
 YT_LENGTH_LIMIT_S = 4800  # 1 hour 20 minutes
 device = 0 if torch.cuda.is_available() else "cpu"
+# Load the model
+model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME)
+tokenizer = WhisperTokenizer.from_pretrained(MODEL_NAME)
+# Initialize the pipeline with the quantized model
+pipe = pipeline(task="automatic-speech-recognition", model=model, tokenizer=tokenizer, chunk_length_s=30, device=device)
 # Define paths and create directory if not exists
 JSON_DATASET_DIR = Path("json_dataset")
     )
     return HTML_str
+@spaces.GPU
 @lru_cache(maxsize=10)
 def transcribe_audio(inputs, task):
     if inputs is None:
     with youtube_dl.YoutubeDL(ydl_opts) as ydl:
         ydl.download([yt_url])
+@spaces.GPU
 @lru_cache(maxsize=10)
 def yt_transcribe(yt_url, task):
     with tempfile.TemporaryDirectory() as tmpdirname: