Abbas0786 commited on
Commit
6d4cee8
1 Parent(s): 7e54b28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -8
app.py CHANGED
@@ -1,18 +1,34 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
  import torch
 
 
4
  import io
5
 
6
  # Load the ASR pipeline with Whisper model
7
  pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def transcribe_audio(audio_file):
10
- # Load audio file
11
- audio_bytes = audio_file.read()
12
- audio = io.BytesIO(audio_bytes)
13
 
14
  # Transcribe audio
15
- transcription = pipe(audio)
16
  return transcription['text']
17
 
18
  # Streamlit UI
@@ -22,7 +38,10 @@ st.write("Upload an audio file to transcribe its content into text.")
22
  uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3", "flac"])
23
 
24
  if uploaded_file is not None:
25
- with st.spinner("Transcribing..."):
26
- text = transcribe_audio(uploaded_file)
27
- st.subheader("Transcription Result:")
28
- st.write(text)
 
 
 
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  import torch
4
+ import numpy as np
5
+ from pydub import AudioSegment
6
  import io
7
 
8
  # Load the ASR pipeline with Whisper model
9
  pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
10
 
11
+ def audio_to_numpy(audio_file):
12
+ # Load the audio file into an AudioSegment object
13
+ audio = AudioSegment.from_file(io.BytesIO(audio_file.read()))
14
+
15
+ # Convert audio to mono and set sample rate to 16000
16
+ audio = audio.set_channels(1).set_frame_rate(16000)
17
+
18
+ # Convert to numpy array
19
+ samples = np.array(audio.get_array_of_samples())
20
+
21
+ # Normalize the data
22
+ samples = samples.astype(np.float32) / np.iinfo(audio.sample_width * 8).max
23
+
24
+ return samples
25
+
26
  def transcribe_audio(audio_file):
27
+ # Convert audio bytes to numpy array
28
+ audio_numpy = audio_to_numpy(audio_file)
 
29
 
30
  # Transcribe audio
31
+ transcription = pipe(audio_numpy)
32
  return transcription['text']
33
 
34
  # Streamlit UI
 
38
  uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3", "flac"])
39
 
40
  if uploaded_file is not None:
41
+ try:
42
+ with st.spinner("Transcribing..."):
43
+ text = transcribe_audio(uploaded_file)
44
+ st.subheader("Transcription Result:")
45
+ st.write(text)
46
+ except Exception as e:
47
+ st.error(f"An error occurred: {e}")