Spaces:

NealCaren
/

transcript

Runtime error

App Files Files Community

Neal Caren commited on Sep 30, 2022

Commit

1ac8b8b

•

1 Parent(s): 9908ddd

Multilingual.

Browse files

Files changed (1) hide show

app.py +11 -9

app.py CHANGED Viewed

@@ -14,15 +14,9 @@ def create_download_link(val, filename, label):
     return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}">{label}</a>'
-def speech_to_text(uploaded):
-    st.write(f'Using {model_size} model.')
-    model = whisper.load_model(model_size)
-    result = model.transcribe(uploaded,verbose=True)
-    return f'You said: {result["text"]}'
 def segment(nu_speakers):
-    diar = Diarizer(embed_model='xvec',cluster_method='sc')
     segments = diar.diarize('mono.wav', num_speakers=nu_speakers)
     sdf = pd.DataFrame(segments)
@@ -38,7 +32,8 @@ def audio_to_df(uploaded):
     monotize(uploaded)
     model = whisper.load_model(model_size)
     result = model.transcribe('mono.wav',verbose=True,
-                          without_timestamps=False)
     tdf = pd.DataFrame(result['segments'])
     return tdf
@@ -99,6 +94,8 @@ descript = ("This web app creates transcripts using OpenAI's [Whisper](https://g
             "* You can upload an audio or video file of up to 200MBs.\n"
             "* Creating the transcript takes some time. "
             "The process takes approximately 20% of the length of the audio file using the base Whisper model.\n "
             "* After uploading the file, be sure to select the number of speakers." )
 st.title("Automated Transcription")
@@ -110,7 +107,7 @@ nu_speakers = form.slider('Number of speakers in recording:', min_value=1, max_v
 models = form.selectbox(
     'Which Whisper model?',
     ('Tiny (fast)', 'Base (good)', 'Small (great but slow)'), index=1)
 submit = form.form_submit_button("Transcribe!")
@@ -122,6 +119,11 @@ if submit:
     elif models == 'Small (great but slow)':
         model_size = 'small'
     bytes_data = uploaded.getvalue()
     with open('temp_audio', 'wb') as outfile:
         outfile.write(bytes_data)

     return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}">{label}</a>'
 def segment(nu_speakers):
+    diar = Diarizer(embed_model='ecapa',cluster_method='sc')
     segments = diar.diarize('mono.wav', num_speakers=nu_speakers)
     sdf = pd.DataFrame(segments)
     monotize(uploaded)
     model = whisper.load_model(model_size)
     result = model.transcribe('mono.wav',verbose=True,
+                          without_timestamps=False,
+                           task = task)
     tdf = pd.DataFrame(result['segments'])
     return tdf
             "* You can upload an audio or video file of up to 200MBs.\n"
             "* Creating the transcript takes some time. "
             "The process takes approximately 20% of the length of the audio file using the base Whisper model.\n "
+            "* The transcription process handles a variety of languages, and can also translate the audio to English. The tiny model is not good at translating. \n"
+            "* Speaker segmentation seems to work best with the base model. The small model produces better transcripts, but something seems off with the timecodes, degrading the speaker attribution. \n"
             "* After uploading the file, be sure to select the number of speakers." )
 st.title("Automated Transcription")
 models = form.selectbox(
     'Which Whisper model?',
     ('Tiny (fast)', 'Base (good)', 'Small (great but slow)'), index=1)
+translate = form.checkbox('Translate to English?')
 submit = form.form_submit_button("Transcribe!")
     elif models == 'Small (great but slow)':
         model_size = 'small'
+    if translate == True:
+        task = 'translate'
+    else:
+        task = 'transcribe'
     bytes_data = uploaded.getvalue()
     with open('temp_audio', 'wb') as outfile:
         outfile.write(bytes_data)