Spaces:
Runtime error
Runtime error
Neal Caren
commited on
Commit
•
1ac8b8b
1
Parent(s):
9908ddd
Multilingual.
Browse files
app.py
CHANGED
@@ -14,15 +14,9 @@ def create_download_link(val, filename, label):
|
|
14 |
return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}">{label}</a>'
|
15 |
|
16 |
|
17 |
-
def speech_to_text(uploaded):
|
18 |
-
st.write(f'Using {model_size} model.')
|
19 |
-
model = whisper.load_model(model_size)
|
20 |
-
result = model.transcribe(uploaded,verbose=True)
|
21 |
-
return f'You said: {result["text"]}'
|
22 |
-
|
23 |
def segment(nu_speakers):
|
24 |
|
25 |
-
diar = Diarizer(embed_model='
|
26 |
segments = diar.diarize('mono.wav', num_speakers=nu_speakers)
|
27 |
|
28 |
sdf = pd.DataFrame(segments)
|
@@ -38,7 +32,8 @@ def audio_to_df(uploaded):
|
|
38 |
monotize(uploaded)
|
39 |
model = whisper.load_model(model_size)
|
40 |
result = model.transcribe('mono.wav',verbose=True,
|
41 |
-
without_timestamps=False
|
|
|
42 |
tdf = pd.DataFrame(result['segments'])
|
43 |
return tdf
|
44 |
|
@@ -99,6 +94,8 @@ descript = ("This web app creates transcripts using OpenAI's [Whisper](https://g
|
|
99 |
"* You can upload an audio or video file of up to 200MBs.\n"
|
100 |
"* Creating the transcript takes some time. "
|
101 |
"The process takes approximately 20% of the length of the audio file using the base Whisper model.\n "
|
|
|
|
|
102 |
"* After uploading the file, be sure to select the number of speakers." )
|
103 |
|
104 |
st.title("Automated Transcription")
|
@@ -110,7 +107,7 @@ nu_speakers = form.slider('Number of speakers in recording:', min_value=1, max_v
|
|
110 |
models = form.selectbox(
|
111 |
'Which Whisper model?',
|
112 |
('Tiny (fast)', 'Base (good)', 'Small (great but slow)'), index=1)
|
113 |
-
|
114 |
submit = form.form_submit_button("Transcribe!")
|
115 |
|
116 |
|
@@ -122,6 +119,11 @@ if submit:
|
|
122 |
elif models == 'Small (great but slow)':
|
123 |
model_size = 'small'
|
124 |
|
|
|
|
|
|
|
|
|
|
|
125 |
bytes_data = uploaded.getvalue()
|
126 |
with open('temp_audio', 'wb') as outfile:
|
127 |
outfile.write(bytes_data)
|
|
|
14 |
return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}">{label}</a>'
|
15 |
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def segment(nu_speakers):
|
18 |
|
19 |
+
diar = Diarizer(embed_model='ecapa',cluster_method='sc')
|
20 |
segments = diar.diarize('mono.wav', num_speakers=nu_speakers)
|
21 |
|
22 |
sdf = pd.DataFrame(segments)
|
|
|
32 |
monotize(uploaded)
|
33 |
model = whisper.load_model(model_size)
|
34 |
result = model.transcribe('mono.wav',verbose=True,
|
35 |
+
without_timestamps=False,
|
36 |
+
task = task)
|
37 |
tdf = pd.DataFrame(result['segments'])
|
38 |
return tdf
|
39 |
|
|
|
94 |
"* You can upload an audio or video file of up to 200MBs.\n"
|
95 |
"* Creating the transcript takes some time. "
|
96 |
"The process takes approximately 20% of the length of the audio file using the base Whisper model.\n "
|
97 |
+
"* The transcription process handles a variety of languages, and can also translate the audio to English. The tiny model is not good at translating. \n"
|
98 |
+
"* Speaker segmentation seems to work best with the base model. The small model produces better transcripts, but something seems off with the timecodes, degrading the speaker attribution. \n"
|
99 |
"* After uploading the file, be sure to select the number of speakers." )
|
100 |
|
101 |
st.title("Automated Transcription")
|
|
|
107 |
models = form.selectbox(
|
108 |
'Which Whisper model?',
|
109 |
('Tiny (fast)', 'Base (good)', 'Small (great but slow)'), index=1)
|
110 |
+
translate = form.checkbox('Translate to English?')
|
111 |
submit = form.form_submit_button("Transcribe!")
|
112 |
|
113 |
|
|
|
119 |
elif models == 'Small (great but slow)':
|
120 |
model_size = 'small'
|
121 |
|
122 |
+
if translate == True:
|
123 |
+
task = 'translate'
|
124 |
+
else:
|
125 |
+
task = 'transcribe'
|
126 |
+
|
127 |
bytes_data = uploaded.getvalue()
|
128 |
with open('temp_audio', 'wb') as outfile:
|
129 |
outfile.write(bytes_data)
|