Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,46 +1,48 @@
|
|
1 |
from faster_whisper import WhisperModel
|
2 |
import streamlit as st
|
3 |
-
|
|
|
|
|
4 |
model_size = "metame/faster-distil-whisper-large-v2"
|
5 |
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
6 |
-
|
|
|
|
|
7 |
audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "ogg", "flac"])
|
8 |
-
|
9 |
-
|
10 |
-
"Japanese": "jpn_Jpan",
|
11 |
-
"Hindi": "hin_Deva",
|
12 |
-
"French": "fra_Latn",
|
13 |
-
"Spanish": "spa_Latn",
|
14 |
-
"Chinese": "zho_Hans",
|
15 |
-
"Italian": "ita_Latn",
|
16 |
-
"German": "deu_Latn",
|
17 |
-
"Russian": "rus_Cyrl"
|
18 |
-
}
|
19 |
-
selected_language = st.selectbox("Select Target Language", list(languages.keys()))
|
20 |
-
if st.button("Transcribe and Translate"):
|
21 |
if audio_file is not None:
|
22 |
-
|
|
|
|
|
23 |
f.write(audio_file.getbuffer())
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
st.
|
41 |
-
|
42 |
-
|
43 |
-
st.markdown(f"**
|
|
|
|
|
|
|
|
|
|
|
44 |
else:
|
45 |
st.error("Please upload an audio file.")
|
46 |
|
|
|
|
|
|
1 |
from faster_whisper import WhisperModel
|
2 |
import streamlit as st
|
3 |
+
import time
|
4 |
+
|
5 |
+
# Load the model
|
6 |
model_size = "metame/faster-distil-whisper-large-v2"
|
7 |
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
8 |
+
|
9 |
+
# Streamlit UI
|
10 |
+
st.title("Audio to Text")
|
11 |
audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "ogg", "flac"])
|
12 |
+
|
13 |
+
if st.button("Transcribe"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
if audio_file is not None:
|
15 |
+
# Save the audio file
|
16 |
+
audio_path = "temp_audio." + audio_file.name.split('.')[-1]
|
17 |
+
with open(audio_path, "wb") as f:
|
18 |
f.write(audio_file.getbuffer())
|
19 |
+
|
20 |
+
try:
|
21 |
+
# Start transcription process
|
22 |
+
start_time = time.time()
|
23 |
+
segments, info = model.transcribe(audio_path, beam_size=5)
|
24 |
+
|
25 |
+
# Accumulate transcription results
|
26 |
+
transcription = ""
|
27 |
+
for segment in segments:
|
28 |
+
transcription += f"**[{segment.start:.2f}s - {segment.end:.2f}s]** {segment.text}\n"
|
29 |
+
|
30 |
+
processing_time = time.time() - start_time
|
31 |
+
|
32 |
+
# Display results
|
33 |
+
if transcription:
|
34 |
+
st.write("Detected language: '{}' with probability {:.2f}".format(info.language, info.language_probability))
|
35 |
+
st.subheader("Transcription")
|
36 |
+
st.write(transcription)
|
37 |
+
st.subheader("Processing Time")
|
38 |
+
st.markdown(f"**{processing_time:.2f} seconds**")
|
39 |
+
else:
|
40 |
+
st.error("No transcription was produced. Please check the audio file and try again.")
|
41 |
+
|
42 |
+
except Exception as e:
|
43 |
+
st.error(f"An error occurred during transcription: {e}")
|
44 |
else:
|
45 |
st.error("Please upload an audio file.")
|
46 |
|
47 |
+
|
48 |
+
|