Neal Caren commited on
Commit
014d79d
1 Parent(s): fd2a8f0
Files changed (1) hide show
  1. app.py +5 -87
app.py CHANGED
@@ -6,89 +6,10 @@ from simple_diarizer.diarizer import Diarizer
6
  import streamlit as st
7
 
8
 
9
- '''
10
- def speech_to_text(uploaded):
11
- model = whisper.load_model('tiny')
12
- result = model.transcribe(uploaded,verbose=True)
13
- return f'You said: {result["text"]}'
14
-
15
- def segment(nu_speakers):
16
-
17
- diar = Diarizer(embed_model='xvec',cluster_method='sc')
18
- segments = diar.diarize('mono.wav', num_speakers=nu_speakers)
19
-
20
- sdf = pd.DataFrame(segments)
21
-
22
- # reorganize so the first speaker is always speaker 1
23
- speaker_s = sdf['label'].drop_duplicates().reset_index()['label']
24
- speaker_d = dict((v,k+1) for k,v in speaker_s.items())
25
-
26
- sdf['speaker'] = sdf['label'].replace(speaker_d)
27
- return sdf
28
-
29
- def audio_to_df(uploaded):
30
- monotize(uploaded)
31
- model = whisper.load_model('tiny')
32
- result = model.transcribe('mono.wav',verbose=True,
33
- without_timestamps=False)
34
- tdf = pd.DataFrame(result['segments'])
35
- return tdf
36
-
37
- def monotize(uploaded):
38
- cmd = f"ffmpeg -y -i {uploaded} -acodec pcm_s16le -ar 16000 -ac 1 mono.wav"
39
- subprocess.Popen(cmd, shell=True).wait()
40
-
41
- def add_preface(row):
42
- text = row['text'].replace('\n','')
43
- speaker = row['speaker']
44
- return f'Speaker {speaker}: {text}'
45
-
46
- def transcribe(uploaded, nu_speakers):
47
-
48
- monotize(uploaded)
49
- tdf = audio_to_df(uploaded)
50
- sdf = segment(nu_speakers)
51
-
52
- ns_list = sdf[['start','speaker']].to_dict(orient='records')
53
-
54
- # Find the nearest transcript line to the start of each speaker
55
- for row in ns_list:
56
- input = row['start']
57
- id = tdf.iloc[(tdf['start']-input).abs().argsort()[:1]]['id'].values[0]
58
- tdf.loc[tdf['id'] ==id, 'speaker'] = row['speaker']
59
-
60
- tdf['speaker'].fillna(method = 'ffill', inplace = True)
61
- tdf['speaker'].fillna(method = 'bfill', inplace = True)
62
-
63
- tdf['n1'] = tdf['speaker'] != tdf['speaker'].shift(1)
64
- tdf['speach'] = tdf['n1'].cumsum()
65
- binned_df = tdf.groupby(['speach', 'speaker'])['text'].apply('\n'.join).reset_index()
66
-
67
- binned_df['speaker'] = binned_df['speaker'].astype(int)
68
- binned_df['output'] = binned_df.apply(add_preface, axis=1)
69
-
70
- lines = []
71
- for row in binned_df['output'].values:
72
- st.write(row)
73
- lines.append(row)
74
-
75
- return '\n'.join(lines)
76
-
77
-
78
- descript = ("This web app creates transcripts using OpenAI's [Whisper](https://github.com/openai/whisper) to transcribe "
79
- "audio files combined with [Chau](https://github.com/cvqluu)'s [Simple Diarizer](https://github.com/cvqluu/simple_diarizer) "
80
- "to partition the text by speaker.\n"
81
- "* Creating the transcript takes some time. "
82
- "Using the default base transcription model, the process takes approximately 20% of the length of the audio file.\n "
83
- "* There seems to be cap on the uploaded file size of about 20MBs. My [colab](https://colab.research.google.com/drive/18AD-mb3bT4s8k3UNhZu-ghPq2DT5il3V?usp=sharing) version "
84
- "can handle any file size, but requies some Python knowledge.\n"
85
- "* After uploading the file, **be sure to select the number of speakers**." )
86
-
87
- '''
88
- with st.form(key='my_form'):
89
- uploaded = st.file_uploader("Choose a file")
90
- nu_speakers = st.slider('Number of speakers in audio file:', min_value=1, max_value=6, value=2, step=1)
91
- submit = st.form_submit_button("Transcribe!")
92
 
93
 
94
  if submit:
@@ -97,7 +18,4 @@ if submit:
97
  outfile.write(bytes_data)
98
  #st.write('Converting audio file.')
99
  #monotize('temp_audio')
100
- text = transcribe('temp_audio', nu_speakers)
101
-
102
-
103
- # To read file as bytes:
 
6
  import streamlit as st
7
 
8
 
9
+ form = st.form(key='my_form')
10
+ uploaded = form.file_uploader("Choose a file")
11
+ nu_speakers = form.slider('Number of speakers in audio file:', min_value=1, max_value=6, value=2, step=1)
12
+ submit = form.form_submit_button("Transcribe!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  if submit:
 
18
  outfile.write(bytes_data)
19
  #st.write('Converting audio file.')
20
  #monotize('temp_audio')
21
+ #text = transcribe('temp_audio', nu_speakers)