emon-j commited on
Commit
3404b92
1 Parent(s): 011f92f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -36
app.py CHANGED
@@ -1,46 +1,48 @@
1
  from faster_whisper import WhisperModel
2
  import streamlit as st
3
- from translator import translate
 
 
4
  model_size = "metame/faster-distil-whisper-large-v2"
5
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
6
- st.title("Audio to Text Translator")
 
 
7
  audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "ogg", "flac"])
8
- languages = {
9
- "English": "eng_Latn",
10
- "Japanese": "jpn_Jpan",
11
- "Hindi": "hin_Deva",
12
- "French": "fra_Latn",
13
- "Spanish": "spa_Latn",
14
- "Chinese": "zho_Hans",
15
- "Italian": "ita_Latn",
16
- "German": "deu_Latn",
17
- "Russian": "rus_Cyrl"
18
- }
19
- selected_language = st.selectbox("Select Target Language", list(languages.keys()))
20
- if st.button("Transcribe and Translate"):
21
  if audio_file is not None:
22
- with open("temp_audio.mp3", "wb") as f:
 
 
23
  f.write(audio_file.getbuffer())
24
- segments, info = model.transcribe("temp_audio.mp3", beam_size=5)
25
- st.write("Detected language '%s' with probability %f" % (info.language, info.language_probability))
26
- col1, col2 = st.columns(2)
27
-
28
- with col1:
29
- st.subheader("Transcription")
30
-
31
- with col2:
32
- st.subheader("Translation")
33
-
34
- # Process each segment individually
35
- for segment in segments:
36
- # Translate the segment text
37
- translated_text = translate(segment.text, tgt_lang=languages[selected_language])
38
-
39
- with col1:
40
- st.markdown(f"**[{segment.start:.2f}s - {segment.end:.2f}s]** {segment.text}")
41
-
42
- with col2:
43
- st.markdown(f"**[{segment.start:.2f}s - {segment.end:.2f}s]** {translated_text}")
 
 
 
 
 
44
  else:
45
  st.error("Please upload an audio file.")
46
 
 
 
 
1
  from faster_whisper import WhisperModel
2
  import streamlit as st
3
+ import time
4
+
5
+ # Load the model
6
  model_size = "metame/faster-distil-whisper-large-v2"
7
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
8
+
9
+ # Streamlit UI
10
+ st.title("Audio to Text")
11
  audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "ogg", "flac"])
12
+
13
+ if st.button("Transcribe"):
 
 
 
 
 
 
 
 
 
 
 
14
  if audio_file is not None:
15
+ # Save the audio file
16
+ audio_path = "temp_audio." + audio_file.name.split('.')[-1]
17
+ with open(audio_path, "wb") as f:
18
  f.write(audio_file.getbuffer())
19
+
20
+ try:
21
+ # Start transcription process
22
+ start_time = time.time()
23
+ segments, info = model.transcribe(audio_path, beam_size=5)
24
+
25
+ # Accumulate transcription results
26
+ transcription = ""
27
+ for segment in segments:
28
+ transcription += f"**[{segment.start:.2f}s - {segment.end:.2f}s]** {segment.text}\n"
29
+
30
+ processing_time = time.time() - start_time
31
+
32
+ # Display results
33
+ if transcription:
34
+ st.write("Detected language: '{}' with probability {:.2f}".format(info.language, info.language_probability))
35
+ st.subheader("Transcription")
36
+ st.write(transcription)
37
+ st.subheader("Processing Time")
38
+ st.markdown(f"**{processing_time:.2f} seconds**")
39
+ else:
40
+ st.error("No transcription was produced. Please check the audio file and try again.")
41
+
42
+ except Exception as e:
43
+ st.error(f"An error occurred during transcription: {e}")
44
  else:
45
  st.error("Please upload an audio file.")
46
 
47
+
48
+