dwb2023 commited on
Commit
6b09585
1 Parent(s): 741549e

Update app.py

Browse files

use quantized model

Files changed (1) hide show
  1. app.py +9 -12
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import yt_dlp as youtube_dl
3
- from transformers import pipeline, BitsAndBytesConfig
4
  from transformers.pipelines.audio_utils import ffmpeg_read
5
  import torch
6
  from huggingface_hub import CommitScheduler
@@ -12,24 +12,21 @@ from datetime import datetime
12
  from pathlib import Path
13
  from uuid import uuid4
14
  from functools import lru_cache
15
- import bitsandbytes as bnb
16
 
17
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
18
 
19
- MODEL_NAME = "openai/whisper-large-v3"
20
  BATCH_SIZE = 8
21
  YT_LENGTH_LIMIT_S = 4800 # 1 hour 20 minutes
22
 
23
  device = 0 if torch.cuda.is_available() else "cpu"
24
 
25
- # Load model with bitsandbytes quantization
26
- quantization_config = BitsAndBytesConfig(
27
- load_in_4bit=True,
28
- bnb_4bit_compute_dtype=torch.float16,
29
- )
30
 
31
- # bnb_config = bnb.QuantizationConfig(bits=4)
32
- pipe = pipeline(task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device, quantization_config=quantization_config)
33
 
34
  # Define paths and create directory if not exists
35
  JSON_DATASET_DIR = Path("json_dataset")
@@ -52,7 +49,7 @@ def _return_yt_html_embed(yt_url):
52
  )
53
  return HTML_str
54
 
55
- @spaces.GPU(duration=120)
56
  @lru_cache(maxsize=10)
57
  def transcribe_audio(inputs, task):
58
  if inputs is None:
@@ -75,7 +72,7 @@ def download_yt_audio(yt_url, filename):
75
  with youtube_dl.YoutubeDL(ydl_opts) as ydl:
76
  ydl.download([yt_url])
77
 
78
- @spaces.GPU(duration=120)
79
  @lru_cache(maxsize=10)
80
  def yt_transcribe(yt_url, task):
81
  with tempfile.TemporaryDirectory() as tmpdirname:
 
1
  import gradio as gr
2
  import yt_dlp as youtube_dl
3
+ from transformers import pipeline, WhisperForConditionalGeneration, WhisperTokenizer
4
  from transformers.pipelines.audio_utils import ffmpeg_read
5
  import torch
6
  from huggingface_hub import CommitScheduler
 
12
  from pathlib import Path
13
  from uuid import uuid4
14
  from functools import lru_cache
 
15
 
16
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
17
 
18
+ MODEL_NAME = "dwb2023/whisper-large-v3-quantized"
19
  BATCH_SIZE = 8
20
  YT_LENGTH_LIMIT_S = 4800 # 1 hour 20 minutes
21
 
22
  device = 0 if torch.cuda.is_available() else "cpu"
23
 
24
+ # Load the model
25
+ model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME)
26
+ tokenizer = WhisperTokenizer.from_pretrained(MODEL_NAME)
 
 
27
 
28
+ # Initialize the pipeline with the quantized model
29
+ pipe = pipeline(task="automatic-speech-recognition", model=model, tokenizer=tokenizer, chunk_length_s=30, device=device)
30
 
31
  # Define paths and create directory if not exists
32
  JSON_DATASET_DIR = Path("json_dataset")
 
49
  )
50
  return HTML_str
51
 
52
+ @spaces.GPU
53
  @lru_cache(maxsize=10)
54
  def transcribe_audio(inputs, task):
55
  if inputs is None:
 
72
  with youtube_dl.YoutubeDL(ydl_opts) as ydl:
73
  ydl.download([yt_url])
74
 
75
+ @spaces.GPU
76
  @lru_cache(maxsize=10)
77
  def yt_transcribe(yt_url, task):
78
  with tempfile.TemporaryDirectory() as tmpdirname: