pritamdeka commited on
Commit
4eaea04
1 Parent(s): 4206062

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -11
app.py CHANGED
@@ -1,14 +1,9 @@
1
  import gradio as gr
2
- import torch
3
  import whisper
4
  import librosa
5
- from transformers import pipeline
6
 
7
- # Check if DistilWhisper is available on Hugging Face
8
- # This is a placeholder model name, update it with an actual distillation model if available
9
- # distil_whisper_model = "huggingface/distil-whisper-model"
10
-
11
- # If no distil version, load smaller Whisper model for speed (e.g., "base" or "tiny")
12
  model = whisper.load_model("tiny")
13
 
14
  # Chunking function to split the audio into smaller parts (e.g., 5-second chunks)
@@ -38,16 +33,15 @@ def transcribe_audio_in_chunks(audio_file):
38
  if audio_file is None:
39
  return "No audio file provided."
40
 
41
- # Check the audio file path
42
- if not os.path.exists(audio_file):
43
- return "The audio file does not exist or is inaccessible."
44
-
45
  # Chunk the audio into 5-second parts
46
  chunks, sr = chunk_audio(audio_file, chunk_size=5)
47
 
48
  # Process each chunk and append the results as real-time transcription
49
  transcription = ""
50
  for i, chunk in enumerate(chunks):
 
 
 
51
  # Transcribe each chunk
52
  result = model.transcribe(chunk)
53
  transcription += f"Chunk {i + 1}: {result['text']}\n"
 
1
  import gradio as gr
 
2
  import whisper
3
  import librosa
4
+ import numpy as np
5
 
6
+ # Load Whisper model (using tiny for faster performance)
 
 
 
 
7
  model = whisper.load_model("tiny")
8
 
9
  # Chunking function to split the audio into smaller parts (e.g., 5-second chunks)
 
33
  if audio_file is None:
34
  return "No audio file provided."
35
 
 
 
 
 
36
  # Chunk the audio into 5-second parts
37
  chunks, sr = chunk_audio(audio_file, chunk_size=5)
38
 
39
  # Process each chunk and append the results as real-time transcription
40
  transcription = ""
41
  for i, chunk in enumerate(chunks):
42
+ # Convert the chunk into the correct format for Whisper (numpy array of floats)
43
+ chunk = np.array(chunk)
44
+
45
  # Transcribe each chunk
46
  result = model.transcribe(chunk)
47
  transcription += f"Chunk {i + 1}: {result['text']}\n"