Spaces:

NealCaren
/

transcript

Runtime error

App Files Files Community

Neal Caren commited on Sep 29, 2022

Commit

014d79d

•

1 Parent(s): fd2a8f0

shrunk

Browse files

Files changed (1) hide show

app.py +5 -87

app.py CHANGED Viewed

@@ -6,89 +6,10 @@ from simple_diarizer.diarizer import Diarizer
 import streamlit as st
-'''
-def speech_to_text(uploaded):
-    model = whisper.load_model('tiny')
-    result = model.transcribe(uploaded,verbose=True)
-    return f'You said: {result["text"]}'
-def segment(nu_speakers):
-    diar = Diarizer(embed_model='xvec',cluster_method='sc')
-    segments = diar.diarize('mono.wav', num_speakers=nu_speakers)
-    sdf = pd.DataFrame(segments)
-    # reorganize so the first speaker is always speaker 1
-    speaker_s = sdf['label'].drop_duplicates().reset_index()['label']
-    speaker_d = dict((v,k+1) for k,v in speaker_s.items())
-    sdf['speaker'] = sdf['label'].replace(speaker_d)
-    return sdf
-def audio_to_df(uploaded):
-    monotize(uploaded)
-    model = whisper.load_model('tiny')
-    result = model.transcribe('mono.wav',verbose=True,
-                          without_timestamps=False)
-    tdf = pd.DataFrame(result['segments'])
-    return tdf
-def monotize(uploaded):
-    cmd = f"ffmpeg -y -i {uploaded} -acodec pcm_s16le -ar 16000 -ac 1 mono.wav"
-    subprocess.Popen(cmd, shell=True).wait()
-def add_preface(row):
-    text = row['text'].replace('\n','')
-    speaker = row['speaker']
-    return f'Speaker {speaker}: {text}'
-def transcribe(uploaded, nu_speakers):
-    monotize(uploaded)
-    tdf = audio_to_df(uploaded)
-    sdf = segment(nu_speakers)
-    ns_list = sdf[['start','speaker']].to_dict(orient='records')
-    # Find the nearest transcript line to the start of each speaker
-    for row in ns_list:
-        input = row['start']
-        id = tdf.iloc[(tdf['start']-input).abs().argsort()[:1]]['id'].values[0]
-        tdf.loc[tdf['id'] ==id, 'speaker'] = row['speaker']
-    tdf['speaker'].fillna(method = 'ffill', inplace = True)
-    tdf['speaker'].fillna(method = 'bfill', inplace = True)
-    tdf['n1'] = tdf['speaker'] != tdf['speaker'].shift(1)
-    tdf['speach'] = tdf['n1'].cumsum()
-    binned_df = tdf.groupby(['speach', 'speaker'])['text'].apply('\n'.join).reset_index()
-    binned_df['speaker'] = binned_df['speaker'].astype(int)
-    binned_df['output'] = binned_df.apply(add_preface, axis=1)
-    lines = []
-    for row in binned_df['output'].values:
-        st.write(row)
-        lines.append(row)
-    return '\n'.join(lines)
-descript = ("This web app creates transcripts using OpenAI's [Whisper](https://github.com/openai/whisper) to transcribe "
-            "audio files combined with [Chau](https://github.com/cvqluu)'s [Simple Diarizer](https://github.com/cvqluu/simple_diarizer) "
-            "to partition the text by speaker.\n"
-            "* Creating the transcript takes some time. "
-            "Using the default base transcription model, the process takes approximately 20% of the length of the audio file.\n "
-            "* There seems to be cap on the uploaded file size of about 20MBs. My [colab](https://colab.research.google.com/drive/18AD-mb3bT4s8k3UNhZu-ghPq2DT5il3V?usp=sharing) version "
-            "can handle any file size, but requies some Python knowledge.\n"
-            "* After uploading the file, **be sure to select the number of speakers**." )
-'''
-with st.form(key='my_form'):
-    uploaded = st.file_uploader("Choose a file")
-    nu_speakers = st.slider('Number of speakers in audio file:', min_value=1, max_value=6, value=2, step=1)
-    submit = st.form_submit_button("Transcribe!")
 if submit:
@@ -97,7 +18,4 @@ if submit:
         outfile.write(bytes_data)
     #st.write('Converting audio file.')
     #monotize('temp_audio')
-    text = transcribe('temp_audio', nu_speakers)
-    # To read file as bytes:

 import streamlit as st
+form = st.form(key='my_form')
+uploaded = form.file_uploader("Choose a file")
+nu_speakers = form.slider('Number of speakers in audio file:', min_value=1, max_value=6, value=2, step=1)
+submit = form.form_submit_button("Transcribe!")
 if submit:
         outfile.write(bytes_data)
     #st.write('Converting audio file.')
     #monotize('temp_audio')
+    #text = transcribe('temp_audio', nu_speakers)