kobakhit commited on
Commit
b79e42d
β€’
1 Parent(s): c4254a4

ai chat, state logic

Browse files
Files changed (1) hide show
  1. app.py +173 -61
app.py CHANGED
@@ -25,6 +25,13 @@ st.set_page_config(
25
  page_icon = '🌊'
26
  )
27
 
 
 
 
 
 
 
 
28
  def create_audio_stream(audio):
29
  return io.BytesIO(audio.export(format="wav").read())
30
 
@@ -59,31 +66,62 @@ def youtube_video_id(value):
59
  # fail?
60
  return None
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
 
63
  def load_rttm_file(rttm_path):
64
  return load_rttm(rttm_path)['stream']
65
 
66
-
67
  def load_audio(uploaded_audio):
68
  return AudioSegment.from_file(uploaded_audio)
69
 
70
 
71
- # Set your OpenAI, Hugging Face API keys
72
- openai.api_key = os.getenv('openai')
73
- hf_api_key = os.getenv('hf')
 
 
 
 
 
 
 
 
74
 
75
- st.title("Speech Diarization and Speech-to-Text with PyAnnote and Whisper")
 
 
76
  reddit_thread = 'https://www.reddit.com/r/dataisbeautiful/comments/17413bq/oc_speech_diarization_app_that_transcribes_audio'
77
  with st.expander('About', expanded=True):
78
  st.markdown(f'''
79
  Given an audio file this app will
80
  - [x] 1. Identify and diarize the speakers using `pyannote` [HuggingFace Speaker Diarization api](https://huggingface.co/pyannote/speaker-diarization-3.0)
81
  - [x] 2. Transcribe the audio and attribute to speakers using [OpenAi Whisper API](https://platform.openai.com/docs/guides/speech-to-text/quickstart)
82
- - [ ] 3. Set up an LLM chat with the transcript loaded into its knowledge database, so that a user can "talk" to the transcript of the audio file (WIP)
83
 
84
  This version will only process up to first 6 minutes of an audio file due to limited resources of Streamlit.io apps.
85
  A local version with access to a GPU can process 1 hour of audio in 1 to 5 minutes.
86
- If you would like to use this app at scale reach out directly by creating an issue on github [πŸ€–](https://github.com/KobaKhit/speech-to-text-app/issues)!
87
 
88
  Rule of thumb, for this Streamlit.io hosted app it takes half the duration of the audio to complete processing, ex. g. 6 minute youtube video will take 3 minutes to diarize.
89
 
@@ -95,10 +133,13 @@ option = st.radio("Select source:", ["Upload an audio file", "Use YouTube link",
95
 
96
  # Upload audio file
97
  if option == "Upload an audio file":
98
- uploaded_audio = st.file_uploader("Upload an audio file (MP3 or WAV)", type=["mp3", "wav","mp4"])
 
 
 
99
  with st.expander('Optional Parameters'):
100
- rttm = st.file_uploader("Upload .rttm if you already have one", type=["rttm"])
101
- transcript_file = st.file_uploader("Upload transcipt json", type=["json"])
102
  youtube_link = st.text_input('Youtube link of the audio sample')
103
 
104
  if uploaded_audio is not None:
@@ -110,32 +151,23 @@ if option == "Upload an audio file":
110
  # audio = audio.set_frame_rate(sample_rate)
111
 
112
  # use youtube link
113
- elif option == "Use YouTube link":
114
-
115
- youtube_link_raw = st.text_input("Enter the YouTube video URL:")
116
- youtube_link = f'https://youtu.be/{youtube_video_id(youtube_link_raw)}'
117
 
118
- with st.expander('Optional Parameters'):
119
- rttm = st.file_uploader("Upload .rttm if you already have one", type=["rttm"])
120
- transcript_file = st.file_uploader("Upload transcipt json", type=["json"])
 
 
 
 
 
 
 
 
 
 
121
  if youtube_link_raw:
122
- st.write(f"Fetching audio from YouTube: {youtube_link}")
123
- try:
124
- yt = YouTube(youtube_link)
125
- audio_stream = yt.streams.filter(only_audio=True).first()
126
- audio_name = audio_stream.default_filename
127
- st.write(f"Downloaded {audio_name}")
128
- except pytube.exceptions.AgeRestrictedError:
129
- st.stop('Age restricted videos cannot be processed.')
130
-
131
- try:
132
- os.remove('sample.mp4')
133
- except OSError:
134
- pass
135
- audio_file = audio_stream.download(filename='sample.mp4')
136
- time.sleep(2)
137
- audio = load_audio('sample.mp4')
138
- st.audio(create_audio_stream(audio), format="audio/mp4", start_time=0)
139
  # sample_rate = st.number_input("Enter the sample rate of the audio", min_value=8000, max_value=48000)
140
  # audio = audio.set_frame_rate(sample_rate)
141
  # except Exception as e:
@@ -143,7 +175,7 @@ elif option == "Use YouTube link":
143
  elif option == 'See Example':
144
  youtube_link = 'https://www.youtube.com/watch?v=TamrOZX9bu8'
145
  audio_name = 'Stephen A. Smith has JOKES with Shannon Sharpe'
146
- st.write(f'Loaded audio file from {youtube_link} - Stephen A. Smith has JOKES with Shannon Sharpe πŸ‘πŸ˜‚')
147
  if os.path.isfile('example/steve a smith jokes.mp4'):
148
  audio = load_audio('example/steve a smith jokes.mp4')
149
  else:
@@ -154,14 +186,13 @@ elif option == 'See Example':
154
  audio = load_audio('sample.mp4')
155
 
156
  if os.path.isfile("example/steve a smith jokes.rttm"):
157
- rttm = "example/steve a smith jokes.rttm"
158
  if os.path.isfile('example/steve a smith jokes.json'):
159
- transcript_file = 'example/steve a smith jokes.json'
160
 
161
  st.audio(create_audio_stream(audio), format="audio/mp4", start_time=0)
162
 
163
 
164
-
165
  # Diarize
166
  if "audio" in locals():
167
  st.write('Performing Diarization...')
@@ -182,9 +213,9 @@ if "audio" in locals():
182
  pipeline.to(torch.device('cuda'))
183
 
184
  # run the pipeline on an audio file
185
- if 'rttm' in locals() and rttm != None:
186
- st.write(f'Loading {rttm}')
187
- diarization = load_rttm_file(rttm)
188
  else:
189
  # with ProgressHook() as hook:
190
  audio_ = create_audio_stream(audio)
@@ -193,6 +224,7 @@ if "audio" in locals():
193
  # dump the diarization output to disk using RTTM format
194
  with open(f'{audio_name.split(".")[0]}.rttm', "w") as f:
195
  diarization.write_rttm(f)
 
196
 
197
  # Display the diarization results
198
  st.write("Diarization Results:")
@@ -211,7 +243,7 @@ if "audio" in locals():
211
  temp = {'speaker': speaker,
212
  'start': turn.start, 'end': turn.end, 'duration': turn.end-turn.start,
213
  'audio': audio[turn.start*1000:turn.end*1000]}
214
- if 'transcript_file' in locals() and transcript_file == None:
215
  temp['audio_stream'] = create_audio_stream(audio[turn.start*1000:turn.end*1000])
216
  sp_chunks.append(temp)
217
 
@@ -224,7 +256,7 @@ if "audio" in locals():
224
  st.pyplot(figure)
225
 
226
  st.write('Speakers and Audio Samples')
227
- with st.expander('Samples', expanded=True):
228
  for speaker in set(s['speaker'] for s in sp_chunks):
229
  temp = max(filter(lambda d: d['speaker'] == speaker, sp_chunks), key=lambda x: x['duration'])
230
  speak_time = sum(c['duration'] for c in filter(lambda d: d['speaker'] == speaker, sp_chunks))
@@ -240,32 +272,36 @@ if "audio" in locals():
240
 
241
  st.divider()
242
  # # Perform transcription with Whisper ASR
 
 
 
 
243
  st.write('Transcribing using Whisper API (150 requests limit)...')
244
- container = st.container()
245
 
246
- limit = 150
247
- progress_text = f"Processing 1/{len(sp_chunks[:limit])}..."
248
  my_bar = st.progress(0, text=progress_text)
 
249
  with st.expander('Transcript', expanded=True):
250
- if 'transcript_file' in locals() and transcript_file != None:
251
- with open(transcript_file,'r') as f:
252
  sp_chunks_loaded = json.load(f)
253
  for i,s in enumerate(sp_chunks_loaded):
254
  if s['transcript'] != None:
255
- transcript_summary = f"{s['speaker']} start={float(s['start']):.1f}s end={float(s['end']):.1f}s: {s['transcript']}"
256
- if youtube_link != None:
257
  transcript_summary += f" {add_query_parameter(youtube_link, {'t':str(int(s['start']))})}"
258
 
259
- st.write(transcript_summary)
260
  progress_text = f"Processing {i+1}/{len(sp_chunks_loaded)}..."
261
  my_bar.progress((i+1)/len(sp_chunks_loaded), text=progress_text)
262
 
263
  transcript_json = sp_chunks_loaded
264
- transcript_path = f'example-transcript.json'
265
 
266
  else:
267
  sp_chunks_updated = []
268
- for i,s in enumerate(sp_chunks[:limit]):
269
  if s['duration'] > 0.1:
270
  audio_path = s['audio'].export('temp.wav',format='wav')
271
  try:
@@ -276,7 +312,7 @@ if "audio" in locals():
276
 
277
  if transcript !='' and transcript != None:
278
  s['transcript'] = transcript
279
- transcript_summary = f"{s['speaker']} start={s['start']:.1f}s end={s['end']:.1f}s : {s['transcript']}"
280
  if youtube_link != None:
281
  transcript_summary += f" {add_query_parameter(youtube_link, {'t':str(int(s['start']))})}"
282
 
@@ -284,27 +320,101 @@ if "audio" in locals():
284
  'start':s['start'], 'end':s['end'],
285
  'duration': s['duration'],'transcript': transcript})
286
 
287
- progress_text = f"Processing {i+1}/{len(sp_chunks[:limit])}..."
288
- my_bar.progress((i+1)/len(sp_chunks[:limit]), text=progress_text)
289
- st.write(transcript_summary)
290
 
291
  transcript_json = [dict((k, d[k]) for k in ['speaker','start','end','duration','transcript'] if k in d) for d in sp_chunks_updated]
292
- transcript_path = f'{audio_name.split(".")[0]}-transcript.json'
 
293
 
 
294
  with open(transcript_path,'w') as f:
295
  json.dump(transcript_json, f)
 
 
 
296
 
297
- with container:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  st.info(f'Completed transcribing')
299
 
300
  @st.cache_data
301
  def convert_df(string):
302
  # IMPORTANT: Cache the conversion to prevent computation on every rerun
303
  return string.encode('utf-8')
304
-
305
  transcript_json_download = convert_df(json.dumps(transcript_json))
306
-
307
  c1_b,c2_b = st.columns((1,2))
 
 
308
  with c1_b:
309
  ste.download_button(
310
  "Download transcript as json",
@@ -312,10 +422,12 @@ if "audio" in locals():
312
  transcript_path,
313
  )
314
 
 
315
  header = ','.join(transcript_json[0].keys()) + '\n'
316
  for s in transcript_json:
317
  header += ','.join([str(e) if ',' not in str(e) else '"' + str(e) + '"' for e in s.values()]) + '\n'
318
 
 
319
  transcript_csv_download = convert_df(header)
320
  with c2_b:
321
  ste.download_button(
 
25
  page_icon = '🌊'
26
  )
27
 
28
+ # Set your OpenAI, Hugging Face API keys
29
+ openai.api_key = st.secrets['openai']
30
+ hf_api_key = st.secrets['hf']
31
+
32
+ TRANSCRIPTION_REQUEST_LIMIT = 150
33
+ PROMPT_REQUEST_LIMIT = 2
34
+
35
  def create_audio_stream(audio):
36
  return io.BytesIO(audio.export(format="wav").read())
37
 
 
66
  # fail?
67
  return None
68
 
69
+ @st.cache_data
70
+ def process_youtube_link(youtube_link):
71
+ st.write(f"Fetching audio from YouTube: {youtube_link}")
72
+ try:
73
+ yt = YouTube(youtube_link)
74
+ audio_stream = yt.streams.filter(only_audio=True).first()
75
+ audio_name = audio_stream.default_filename
76
+ st.write(f"Downloaded {audio_name}")
77
+ except pytube.exceptions.AgeRestrictedError:
78
+ st.stop('Age restricted videos cannot be processed.')
79
+
80
+ try:
81
+ os.remove('sample.mp4')
82
+ except OSError:
83
+ pass
84
+ audio_file = audio_stream.download(filename='sample.mp4')
85
+ time.sleep(2)
86
+ audio = load_audio('sample.mp4')
87
+ st.audio(create_audio_stream(audio), format="audio/mp4", start_time=0)
88
+ return audio, audio_name
89
 
90
+ @st.cache_data
91
  def load_rttm_file(rttm_path):
92
  return load_rttm(rttm_path)['stream']
93
 
94
+ @st.cache_resource
95
  def load_audio(uploaded_audio):
96
  return AudioSegment.from_file(uploaded_audio)
97
 
98
 
99
+ if "openai_model" not in st.session_state:
100
+ st.session_state["openai_model"] = "gpt-3.5-turbo"
101
+
102
+ if "prompt_request_counter" not in st.session_state:
103
+ st.session_state["prompt_request_counter"] = 0
104
+
105
+ initial_prompt = [{"role": "system", "content": "You are helping to analyze and summarize a transcript of a conversation."},
106
+ {"role": 'user', "content": 'Please summarize briefly the following transcript\n{}'}]
107
+ if "messages" not in st.session_state:
108
+ st.session_state.messages = initial_prompt
109
+
110
 
111
+
112
+
113
+ st.title("Speech to Chat")
114
  reddit_thread = 'https://www.reddit.com/r/dataisbeautiful/comments/17413bq/oc_speech_diarization_app_that_transcribes_audio'
115
  with st.expander('About', expanded=True):
116
  st.markdown(f'''
117
  Given an audio file this app will
118
  - [x] 1. Identify and diarize the speakers using `pyannote` [HuggingFace Speaker Diarization api](https://huggingface.co/pyannote/speaker-diarization-3.0)
119
  - [x] 2. Transcribe the audio and attribute to speakers using [OpenAi Whisper API](https://platform.openai.com/docs/guides/speech-to-text/quickstart)
120
+ - [x] 3. Set up an LLM chat with the transcript loaded into its knowledge database, so that a user can "talk" to the transcript of the audio file
121
 
122
  This version will only process up to first 6 minutes of an audio file due to limited resources of Streamlit.io apps.
123
  A local version with access to a GPU can process 1 hour of audio in 1 to 5 minutes.
124
+ If you would like to use this app at scale reach out directly by creating an issue on [githubπŸ€–](https://github.com/KobaKhit/speech-to-text-app/issues)!
125
 
126
  Rule of thumb, for this Streamlit.io hosted app it takes half the duration of the audio to complete processing, ex. g. 6 minute youtube video will take 3 minutes to diarize.
127
 
 
133
 
134
  # Upload audio file
135
  if option == "Upload an audio file":
136
+ with st.form('uploaded-file', clear_on_submit=True):
137
+ uploaded_audio = st.file_uploader("Upload an audio file (MP3 or WAV)", type=["mp3", "wav","mp4"])
138
+ st.form_submit_button()
139
+ if st.form_submit_button(): st.session_state.messages = initial_prompt
140
  with st.expander('Optional Parameters'):
141
+ # st.session_state.rttm = st.file_uploader("Upload .rttm if you already have one", type=["rttm"])
142
+ # st.session_state.transcript_file = st.file_uploader("Upload transcipt json", type=["json"])
143
  youtube_link = st.text_input('Youtube link of the audio sample')
144
 
145
  if uploaded_audio is not None:
 
151
  # audio = audio.set_frame_rate(sample_rate)
152
 
153
  # use youtube link
154
+ elif option == "Use YouTube link":
 
 
 
155
 
156
+ with st.form('youtube-link', clear_on_submit=True):
157
+ youtube_link_raw = st.text_input("Enter the YouTube video URL:")
158
+ youtube_link = f'https://youtu.be/{youtube_video_id(youtube_link_raw)}'
159
+
160
+ if st.form_submit_button(): # reset variables on new link submit
161
+ st.session_state.messages = initial_prompt
162
+ st.session_state.rttm = None
163
+ st.session_state.transcript_file = None
164
+ st.session_state.prompt_request_counter = 0
165
+
166
+ # with st.expander('Optional Parameters'):
167
+ # st.session_state.rttm = st.file_uploader("Upload .rttm if you already have one", type=["rttm"])
168
+ # st.session_state.transcript_file = st.file_uploader("Upload transcipt json", type=["json"])
169
  if youtube_link_raw:
170
+ audio, audio_name = process_youtube_link(youtube_link)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  # sample_rate = st.number_input("Enter the sample rate of the audio", min_value=8000, max_value=48000)
172
  # audio = audio.set_frame_rate(sample_rate)
173
  # except Exception as e:
 
175
  elif option == 'See Example':
176
  youtube_link = 'https://www.youtube.com/watch?v=TamrOZX9bu8'
177
  audio_name = 'Stephen A. Smith has JOKES with Shannon Sharpe'
178
+ st.write(f'Loaded audio file from {youtube_link} - {audio_name} πŸ‘πŸ˜‚')
179
  if os.path.isfile('example/steve a smith jokes.mp4'):
180
  audio = load_audio('example/steve a smith jokes.mp4')
181
  else:
 
186
  audio = load_audio('sample.mp4')
187
 
188
  if os.path.isfile("example/steve a smith jokes.rttm"):
189
+ st.session_state.rttm = "example/steve a smith jokes.rttm"
190
  if os.path.isfile('example/steve a smith jokes.json'):
191
+ st.session_state.transcript_file = 'example/steve a smith jokes.json'
192
 
193
  st.audio(create_audio_stream(audio), format="audio/mp4", start_time=0)
194
 
195
 
 
196
  # Diarize
197
  if "audio" in locals():
198
  st.write('Performing Diarization...')
 
213
  pipeline.to(torch.device('cuda'))
214
 
215
  # run the pipeline on an audio file
216
+ if 'rttm' in st.session_state and st.session_state.rttm != None:
217
+ st.write(f'Loading {st.session_state.rttm}')
218
+ diarization = load_rttm_file(st.session_state.rttm )
219
  else:
220
  # with ProgressHook() as hook:
221
  audio_ = create_audio_stream(audio)
 
224
  # dump the diarization output to disk using RTTM format
225
  with open(f'{audio_name.split(".")[0]}.rttm', "w") as f:
226
  diarization.write_rttm(f)
227
+ st.session_state.rttm = f'{audio_name.split(".")[0]}.rttm'
228
 
229
  # Display the diarization results
230
  st.write("Diarization Results:")
 
243
  temp = {'speaker': speaker,
244
  'start': turn.start, 'end': turn.end, 'duration': turn.end-turn.start,
245
  'audio': audio[turn.start*1000:turn.end*1000]}
246
+ if 'transcript_file' in st.session_state and st.session_state.transcript_file == None:
247
  temp['audio_stream'] = create_audio_stream(audio[turn.start*1000:turn.end*1000])
248
  sp_chunks.append(temp)
249
 
 
256
  st.pyplot(figure)
257
 
258
  st.write('Speakers and Audio Samples')
259
+ with st.expander('Samples', expanded=False):
260
  for speaker in set(s['speaker'] for s in sp_chunks):
261
  temp = max(filter(lambda d: d['speaker'] == speaker, sp_chunks), key=lambda x: x['duration'])
262
  speak_time = sum(c['duration'] for c in filter(lambda d: d['speaker'] == speaker, sp_chunks))
 
272
 
273
  st.divider()
274
  # # Perform transcription with Whisper ASR
275
+
276
+
277
+ # Transcript containers
278
+ container_transcript_chat = st.container()
279
  st.write('Transcribing using Whisper API (150 requests limit)...')
280
+ container_transcript_completed = st.container()
281
 
282
+ progress_text = f"Processing 1/{len(sp_chunks[:TRANSCRIPTION_REQUEST_LIMIT])}..."
 
283
  my_bar = st.progress(0, text=progress_text)
284
+ # rework the loop. Simplify if Else
285
  with st.expander('Transcript', expanded=True):
286
+ if 'transcript_file' in st.session_state and st.session_state.transcript_file != None:
287
+ with open(st.session_state.transcript_file,'r') as f:
288
  sp_chunks_loaded = json.load(f)
289
  for i,s in enumerate(sp_chunks_loaded):
290
  if s['transcript'] != None:
291
+ transcript_summary = f"**{s['speaker']}** start={float(s['start']):.1f}s end={float(s['end']):.1f}s: {s['transcript']}"
292
+ if youtube_link != None and youtube_link != '':
293
  transcript_summary += f" {add_query_parameter(youtube_link, {'t':str(int(s['start']))})}"
294
 
295
+ st.markdown(transcript_summary)
296
  progress_text = f"Processing {i+1}/{len(sp_chunks_loaded)}..."
297
  my_bar.progress((i+1)/len(sp_chunks_loaded), text=progress_text)
298
 
299
  transcript_json = sp_chunks_loaded
300
+ transcript_path = f'{audio_name.split(".mp4")[0]}-transcript.json'
301
 
302
  else:
303
  sp_chunks_updated = []
304
+ for i,s in enumerate(sp_chunks[:TRANSCRIPTION_REQUEST_LIMIT]):
305
  if s['duration'] > 0.1:
306
  audio_path = s['audio'].export('temp.wav',format='wav')
307
  try:
 
312
 
313
  if transcript !='' and transcript != None:
314
  s['transcript'] = transcript
315
+ transcript_summary = f"**{s['speaker']}** start={s['start']:.1f}s end={s['end']:.1f}s : {s['transcript']}"
316
  if youtube_link != None:
317
  transcript_summary += f" {add_query_parameter(youtube_link, {'t':str(int(s['start']))})}"
318
 
 
320
  'start':s['start'], 'end':s['end'],
321
  'duration': s['duration'],'transcript': transcript})
322
 
323
+ progress_text = f"Processing {i+1}/{len(sp_chunks[:TRANSCRIPTION_REQUEST_LIMIT])}..."
324
+ my_bar.progress((i+1)/len(sp_chunks[:TRANSCRIPTION_REQUEST_LIMIT]), text=progress_text)
325
+ st.markdown(transcript_summary)
326
 
327
  transcript_json = [dict((k, d[k]) for k in ['speaker','start','end','duration','transcript'] if k in d) for d in sp_chunks_updated]
328
+ transcript_path = f'{audio_name.split(".mp4")[0]}-transcript.json'
329
+ st.session_state.transcript_file = transcript_path
330
 
331
+ # save the trancript file
332
  with open(transcript_path,'w') as f:
333
  json.dump(transcript_json, f)
334
+
335
+ # generate transcript string
336
+ transcript_string = '\n'.join([f"{s['speaker']} start={s['start']:.1f}s end={s['end']:.1f}s : {s['transcript']}" for s in transcript_json])
337
 
338
+ @st.cache_data
339
+ def get_initial_response(transcript):
340
+ st.session_state.messages[1]['content'] = st.session_state.messages[1]['content'].format(transcript)
341
+ initial_response = openai.ChatCompletion.create(
342
+ model=st.session_state["openai_model"],
343
+ messages=st.session_state.messages
344
+ )
345
+ return initial_response['choices'][0]['message']['content']
346
+
347
+ # Chat container
348
+ with container_transcript_chat:
349
+ # get a summary of transcript from ChatGpt
350
+ init = get_initial_response(transcript_string)
351
+ # pass transcript to initial prompt
352
+ st.session_state.messages[1]['content'] = st.session_state.messages[1]['content'].format(transcript_string)
353
+
354
+ # LLM Chat
355
+ with st.expander('Summary of the Transcribed Audio File Generated by ChatGPT', expanded = True):
356
+ # display the AI generated summary.
357
+ with st.chat_message("assistant", avatar='https://upload.wikimedia.org/wikipedia/commons/0/04/ChatGPT_logo.svg'):
358
+ st.write(init)
359
+
360
+ # chat field
361
+ with st.form("Chat",clear_on_submit=True):
362
+ prompt = st.text_input("Chat with the Transcript (2 prompts limit)")
363
+ st.form_submit_button()
364
+
365
+ # message list
366
+ # for message in st.session_state.messages[2:]:
367
+ # with st.chat_message(message["role"]):
368
+ # st.markdown(message["content"])
369
+
370
+ # make request if prompt was entered
371
+ if prompt:
372
+ st.session_state.prompt_request_counter += 1
373
+ if st.session_state.prompt_request_counter > PROMPT_REQUEST_LIMIT:
374
+ st.warning('Exceeded prompt limit.');
375
+ st.stop()
376
+ # append user prompt to messages
377
+ st.session_state.messages.append({"role": "user", "content": prompt})
378
+
379
+ # dislay user prompt
380
+ with st.chat_message("user"):
381
+ st.markdown(prompt)
382
+
383
+ # stream LLM Assisstant response
384
+ with st.chat_message("assistant"):
385
+ message_placeholder = st.empty()
386
+ full_response = ""
387
+
388
+ # stream response
389
+ for response in openai.ChatCompletion.create(
390
+ model=st.session_state["openai_model"],
391
+ messages=[
392
+ {"role": m["role"], "content": m["content"]}
393
+ for m in st.session_state.messages
394
+ ],
395
+ stream=True,
396
+ ):
397
+ full_response += response.choices[0].delta.get("content", "")
398
+ message_placeholder.markdown(full_response + "β–Œ")
399
+ message_placeholder.markdown(full_response)
400
+
401
+ # append ai response to messages
402
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
403
+
404
+ # Trancription Completed Section
405
+ with container_transcript_completed:
406
  st.info(f'Completed transcribing')
407
 
408
  @st.cache_data
409
  def convert_df(string):
410
  # IMPORTANT: Cache the conversion to prevent computation on every rerun
411
  return string.encode('utf-8')
412
+ # encode transcript string
413
  transcript_json_download = convert_df(json.dumps(transcript_json))
414
+ # transcript download buttons
415
  c1_b,c2_b = st.columns((1,2))
416
+
417
+ # json button
418
  with c1_b:
419
  ste.download_button(
420
  "Download transcript as json",
 
422
  transcript_path,
423
  )
424
 
425
+ # create csv string
426
  header = ','.join(transcript_json[0].keys()) + '\n'
427
  for s in transcript_json:
428
  header += ','.join([str(e) if ',' not in str(e) else '"' + str(e) + '"' for e in s.values()]) + '\n'
429
 
430
+ # csv button
431
  transcript_csv_download = convert_df(header)
432
  with c2_b:
433
  ste.download_button(