Neal Caren commited on
Commit
ee99df3
1 Parent(s): c0d73db

Fixed download button.

Browse files
Files changed (1) hide show
  1. app.py +17 -18
app.py CHANGED
@@ -4,9 +4,15 @@ import whisper
4
  import subprocess
5
  from simple_diarizer.diarizer import Diarizer
6
  import streamlit as st
 
7
 
 
8
 
9
- model_size = 'base'
 
 
 
 
10
 
11
 
12
  def speech_to_text(uploaded):
@@ -84,22 +90,7 @@ def transcribe(uploaded, nu_speakers):
84
 
85
  tdf_cols = ['speaker','start','end','text']
86
  #st.dataframe(tdf[tdf_cols])
87
-
88
- st.download_button(
89
- label="Download transcript as text file",
90
- data='\n'.join(lines),
91
- file_name='transcript.txt',
92
- mime='text/plain',
93
- )
94
-
95
- st.download_button(
96
- label="Download transcript as CSV (with time codes)",
97
- data=tdf[tdf_cols].to_csv( float_format='%.2f', index=False).encode('utf-8'),
98
- file_name='transcript.csv',
99
- mime='text/csv',
100
- )
101
-
102
- return tdf[tdf_cols]
103
 
104
 
105
  descript = ("This web app creates transcripts using OpenAI's [Whisper](https://github.com/openai/whisper) to transcribe "
@@ -123,4 +114,12 @@ if submit:
123
  bytes_data = uploaded.getvalue()
124
  with open('temp_audio', 'wb') as outfile:
125
  outfile.write(bytes_data)
126
- text_df = transcribe('temp_audio', nu_speakers)
 
 
 
 
 
 
 
 
 
4
  import subprocess
5
  from simple_diarizer.diarizer import Diarizer
6
  import streamlit as st
7
+ import base64
8
 
9
+ model_size = 'tiny'
10
 
11
+
12
+
13
+ def create_download_link(val, filename, label):
14
+ b64 = base64.b64encode(val)
15
+ return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}">{label}</a>'
16
 
17
 
18
  def speech_to_text(uploaded):
 
90
 
91
  tdf_cols = ['speaker','start','end','text']
92
  #st.dataframe(tdf[tdf_cols])
93
+ return {'text':lines, 'df': tdf[tdf_cols]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
 
96
  descript = ("This web app creates transcripts using OpenAI's [Whisper](https://github.com/openai/whisper) to transcribe "
 
114
  bytes_data = uploaded.getvalue()
115
  with open('temp_audio', 'wb') as outfile:
116
  outfile.write(bytes_data)
117
+ transcript = transcribe('temp_audio', nu_speakers)
118
+
119
+ csv = transcript['df'].to_csv( float_format='%.2f', index=False).encode('utf-8')
120
+ text = '\n'.join(transcript['text']).encode('utf-8')
121
+ download_url = create_download_link(text, 'transcript.txt', 'Download transcript as plain text.')
122
+ st.markdown(download_url, unsafe_allow_html=True)
123
+
124
+ download_url = create_download_link(csv, 'transcript.csv', 'Download transcript as CSV (with time codes)')
125
+ st.markdown(download_url, unsafe_allow_html=True)