cadasme commited on
Commit
ff5f53a
1 Parent(s): eb3cdc8

first commit

Browse files
Files changed (2) hide show
  1. app.py +50 -62
  2. requirements.txt +1 -3
app.py CHANGED
@@ -1,14 +1,17 @@
1
- # Import the required libraries
2
  import streamlit as st
 
 
3
  import whisper
4
  import speech_recognition as sr
5
  from pydub import AudioSegment
6
- import os
7
- import sounddevice as sd
8
- import numpy as np
9
- from scipy.io.wavfile import write
10
- import os
11
 
 
 
 
 
 
 
12
 
13
  # Function to transcribe audio using OpenAI Whisper
14
  def transcribe_whisper(model_name, file_path):
@@ -24,86 +27,71 @@ def transcribe_speech_recognition(file_path):
24
  audio = r.record(source)
25
 
26
  try:
27
- result = r.recognize_google(audio, language='es')
28
  return result
29
  except sr.UnknownValueError:
30
  return "No se pudo reconocer ningún texto en el audio."
31
 
32
- # Function to convert mp3 file to wav
33
- def convert_mp3_to_wav(mp3_path):
34
- audio = AudioSegment.from_mp3(mp3_path)
35
- wav_path = mp3_path.replace('.mp3', '.wav')
36
- audio.export(wav_path, format="wav")
37
- return wav_path
38
 
39
- # Function to record audio
40
- def record_audio(filename, duration):
41
- fs = 44100 # Sample rate
42
- channels = 2 # Number of channels (1 for mono, 2 for stereo)
43
-
44
- # Start recording
45
- recording = sd.rec(int(duration * fs), samplerate=fs, channels=channels)
46
- sd.wait() # Wait until recording is finished
47
- # Create temp directory if it doesn't exist
48
- if not os.path.exists(os.path.dirname(filename)):
49
- os.makedirs(os.path.dirname(filename))
 
 
 
 
 
 
 
50
 
51
- # Save as WAV file
52
- write(filename, fs, recording)
53
 
54
  def main():
55
  st.title('Transcriptor de Audio')
56
 
57
  # Choose the transcription method and model
 
58
  transcription_method = st.selectbox('Escoge el método de transcripción', ('OpenAI Whisper', 'Google Speech API'))
59
  if transcription_method == 'OpenAI Whisper':
60
  model_name = st.selectbox('Escoge el modelo de Whisper', ('base', 'small', 'medium', 'large', 'tiny'))
61
 
62
- option = st.selectbox('Escoge la opción', ('Subir un archivo', 'Grabar audio en tiempo real'))
63
-
64
  if option == 'Subir un archivo':
65
  uploaded_file = st.file_uploader("Sube tu archivo de audio para transcribir", type=['wav', 'mp3'])
66
 
67
  if uploaded_file is not None:
68
- file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size}
69
- st.write(file_details)
70
-
71
- # Save uploaded file to temp directory
72
- file_path = os.path.join("temp", uploaded_file.name)
73
- with open(file_path, "wb") as f:
74
- f.write(uploaded_file.getbuffer())
75
-
76
- st.write("Archivo de audio cargado correctamente. Transcribiendo...")
77
- with st.spinner('Transcribiendo...'):
78
- if uploaded_file.name.endswith('.mp3') and transcription_method != 'OpenAI Whisper':
79
- # Convert mp3 to wav if Google Speech API is selected and file is in mp3 format
80
- file_path = convert_mp3_to_wav(file_path)
81
-
82
- # Perform transcription
83
- if transcription_method == 'OpenAI Whisper':
84
- transcript = transcribe_whisper(model_name, file_path)
85
- else:
86
- transcript = transcribe_speech_recognition(file_path)
87
-
88
- st.text_area('Resultado de la Transcripción:', transcript, height=200)
89
 
90
  elif option == 'Grabar audio en tiempo real':
91
- duration = st.slider("Selecciona la duración de la grabación (segundos)", 1, 10, 5)
92
- start_recording = st.button('Empezar a grabar')
93
-
94
- if start_recording:
95
- filename = "temp/recorded_audio.wav"
96
- st.write("Grabación en progreso...")
97
- with st.spinner('Grabando...'):
98
- record_audio(filename, duration)
99
-
100
  st.write("Grabación finalizada. Transcribiendo...")
101
  with st.spinner('Transcribiendo...'):
 
 
 
 
 
102
  # Perform transcription
103
- if transcription_method == 'OpenAI Whisper':
104
- transcript = transcribe_whisper(model_name, filename)
105
- else:
106
- transcript = transcribe_speech_recognition(filename)
107
 
108
  st.text_area('Resultado de la Transcripción:', transcript, height=200)
109
 
 
 
1
  import streamlit as st
2
+ import os
3
+ import tempfile
4
  import whisper
5
  import speech_recognition as sr
6
  from pydub import AudioSegment
7
+ from audio_recorder_streamlit import audio_recorder
 
 
 
 
8
 
9
+ # Function to convert mp3 file to wav
10
+ def convert_mp3_to_wav(mp3_path):
11
+ audio = AudioSegment.from_mp3(mp3_path)
12
+ wav_path = mp3_path.replace('.mp3', '.wav')
13
+ audio.export(wav_path, format="wav")
14
+ return wav_path
15
 
16
  # Function to transcribe audio using OpenAI Whisper
17
  def transcribe_whisper(model_name, file_path):
 
27
  audio = r.record(source)
28
 
29
  try:
30
+ result = r.recognize_google(audio, language='spanish')
31
  return result
32
  except sr.UnknownValueError:
33
  return "No se pudo reconocer ningún texto en el audio."
34
 
35
+ # Function to perform transcription based on selected method
36
+ def perform_transcription(transcription_method, model_name, audio_path):
37
+ if transcription_method == 'OpenAI Whisper':
38
+ return transcribe_whisper(model_name, audio_path)
39
+ else:
40
+ return transcribe_speech_recognition(audio_path)
41
 
42
+ # Function to handle uploaded file transcription
43
+ def handle_uploaded_file(uploaded_file, transcription_method, model_name):
44
+ file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size}
45
+ st.write(file_details)
46
+
47
+ # Save uploaded file to temp directory
48
+ os.makedirs("temp", exist_ok=True) # Create temp directory if it doesn't exist
49
+ file_path = os.path.join("temp", uploaded_file.name)
50
+ with open(file_path, "wb") as f:
51
+ f.write(uploaded_file.getbuffer())
52
+
53
+ with st.spinner('Transcribiendo...'):
54
+ if uploaded_file.name.endswith('.mp3') and transcription_method != 'OpenAI Whisper':
55
+ # Convert mp3 to wav if Google Speech API is selected and file is in mp3 format
56
+ file_path = convert_mp3_to_wav(file_path)
57
+
58
+ # Perform transcription
59
+ transcript = perform_transcription(transcription_method, model_name, file_path)
60
 
61
+ st.text_area('Resultado de la Transcripción:', transcript, height=200)
 
62
 
63
  def main():
64
  st.title('Transcriptor de Audio')
65
 
66
  # Choose the transcription method and model
67
+ option = st.selectbox('Escoger Modelo de Transcripción', ('Subir un archivo', 'Grabar audio en tiempo real'))
68
  transcription_method = st.selectbox('Escoge el método de transcripción', ('OpenAI Whisper', 'Google Speech API'))
69
  if transcription_method == 'OpenAI Whisper':
70
  model_name = st.selectbox('Escoge el modelo de Whisper', ('base', 'small', 'medium', 'large', 'tiny'))
71
 
 
 
72
  if option == 'Subir un archivo':
73
  uploaded_file = st.file_uploader("Sube tu archivo de audio para transcribir", type=['wav', 'mp3'])
74
 
75
  if uploaded_file is not None:
76
+ handle_uploaded_file(uploaded_file, transcription_method, model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  elif option == 'Grabar audio en tiempo real':
79
+ duration = 5
80
+ # duration = st.slider("Selecciona la duración de la grabación (segundos)", 1, 10, 5)
81
+ # st.write("Duración de la grabación:", duration, "segundos")
82
+
83
+ audio_bytes = audio_recorder(pause_threshold=duration, sample_rate=16_000)
84
+
85
+ if audio_bytes:
 
 
86
  st.write("Grabación finalizada. Transcribiendo...")
87
  with st.spinner('Transcribiendo...'):
88
+ # Save recorded audio to a temporary file
89
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_audio:
90
+ temp_path = temp_audio.name
91
+ temp_audio.write(audio_bytes)
92
+
93
  # Perform transcription
94
+ transcript = perform_transcription(transcription_method, model_name, temp_path)
 
 
 
95
 
96
  st.text_area('Resultado de la Transcripción:', transcript, height=200)
97
 
requirements.txt CHANGED
@@ -1,6 +1,4 @@
1
- sounddevice
2
- numpy
3
- scipy
4
  pydub
5
  streamlit
6
  python-dotenv
 
1
+ audio-recorder-streamlit
 
 
2
  pydub
3
  streamlit
4
  python-dotenv