MiakOnline commited on
Commit
55e1272
·
verified ·
1 Parent(s): 25248e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -137
app.py CHANGED
@@ -4,6 +4,7 @@ import tempfile
4
  import os
5
  import time
6
  import re
 
7
  from pydub import AudioSegment
8
  from openpyxl import Workbook
9
  from openpyxl.styles import Font
@@ -21,14 +22,15 @@ st.set_page_config(
21
  page_icon="🎤"
22
  )
23
 
 
 
 
24
  # ---------------------------------------------------
25
- # SIDEBAR SETTINGS
26
  # ---------------------------------------------------
27
- st.sidebar.title("⚙️ Settings")
28
-
29
  model_option = st.sidebar.selectbox(
30
  "Select Whisper Model",
31
- ["base", "small"]
32
  )
33
 
34
  output_mode = st.sidebar.radio(
@@ -36,185 +38,120 @@ output_mode = st.sidebar.radio(
36
  ["Roman Urdu", "English"]
37
  )
38
 
39
- if st.sidebar.button("🧹 Clear Session"):
40
- st.session_state.clear()
41
- st.rerun()
42
-
43
  # ---------------------------------------------------
44
- # HEADER
45
  # ---------------------------------------------------
46
- st.markdown("<h1 style='text-align:center;'>🎤 RecToText Pro</h1>", unsafe_allow_html=True)
47
- st.markdown("<p style='text-align:center;'>AI Lecture Transcriber with Excel & Word Export</p>", unsafe_allow_html=True)
48
- st.divider()
49
 
50
  # ---------------------------------------------------
51
- # FUNCTIONS
52
  # ---------------------------------------------------
53
-
54
- @st.cache_resource
55
- def load_model(model_size):
56
- return whisper.load_model(model_size)
57
-
58
  def clean_text(text):
59
- filler_words = ["um", "hmm", "acha", "matlab", "uh", "huh"]
60
  pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
61
  text = re.sub(pattern, '', text, flags=re.IGNORECASE)
62
  text = re.sub(r'\s+', ' ', text).strip()
 
63
 
64
- # Better paragraph formatting
65
- sentences = re.split(r'(?<=[.!?]) +', text)
66
- paragraphs = []
67
- temp_para = ""
68
-
69
- for i, sentence in enumerate(sentences):
70
- temp_para += sentence + " "
71
- if (i + 1) % 5 == 0:
72
- paragraphs.append(temp_para.strip())
73
- temp_para = ""
74
-
75
- if temp_para:
76
- paragraphs.append(temp_para.strip())
77
-
78
- return "\n\n".join(paragraphs)
79
-
80
  def convert_to_roman_urdu(text):
81
  replacements = {
82
  "ہے": "hai",
83
  "میں": "main",
84
  "اور": "aur",
85
- "کیا": "kya",
86
- "آپ": "aap",
87
- "کی": "ki",
88
- "کا": "ka"
89
  }
90
  for urdu, roman in replacements.items():
91
  text = text.replace(urdu, roman)
92
  return text
93
 
94
- def create_excel(segments):
 
 
 
95
  wb = Workbook()
96
  ws = wb.active
97
- ws.title = "Transcription"
 
 
98
 
99
- headers = ["Timestamp", "Transcribed Text", "Cleaned Output"]
100
- ws.append(headers)
 
 
101
 
102
- for col in range(1, 4):
103
- ws.cell(row=1, column=col).font = Font(bold=True)
104
-
105
- for seg in segments:
106
- timestamp = f"{round(seg['start'],2)} - {round(seg['end'],2)}"
107
- raw_text = seg["text"]
108
- cleaned = clean_text(raw_text)
109
- ws.append([timestamp, raw_text, cleaned])
110
-
111
- excel_buffer = BytesIO()
112
- wb.save(excel_buffer)
113
- excel_buffer.seek(0)
114
- return excel_buffer
115
-
116
- def create_word_document(cleaned_text):
117
  doc = Document()
 
 
118
 
119
- title = doc.add_heading("Lecture Transcription", level=1)
120
- title.alignment = WD_ALIGN_PARAGRAPH.CENTER
121
-
122
- doc.add_paragraph("")
123
-
124
- paragraphs = cleaned_text.split("\n\n")
125
-
126
- for para in paragraphs:
127
- p = doc.add_paragraph(para)
128
- p.paragraph_format.space_after = Pt(12)
129
-
130
- word_buffer = BytesIO()
131
- doc.save(word_buffer)
132
- word_buffer.seek(0)
133
- return word_buffer
134
 
135
  # ---------------------------------------------------
136
  # FILE UPLOADER
137
  # ---------------------------------------------------
138
  uploaded_file = st.file_uploader(
139
- "Upload Lecture Recording (.mp3, .wav, .m4a, .aac)",
140
  type=["mp3", "wav", "m4a", "aac"]
141
  )
142
 
143
  if uploaded_file:
144
 
145
- st.audio(uploaded_file)
146
-
147
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
148
- file_extension = uploaded_file.name.split(".")[-1]
149
- audio = AudioSegment.from_file(uploaded_file, format=file_extension)
150
- audio.export(tmp.name, format="wav")
151
- temp_audio_path = tmp.name
152
-
153
- st.info("Loading Whisper model...")
154
- model = load_model(model_option)
155
-
156
- start_time = time.time()
157
-
158
- with st.spinner("Transcribing... Please wait."):
159
- result = model.transcribe(temp_audio_path)
160
-
161
- end_time = time.time()
162
- os.remove(temp_audio_path)
163
-
164
- detected_lang = result.get("language", "Unknown")
165
- segments = result["segments"]
166
- full_text = result["text"]
167
 
168
- cleaned_text = clean_text(full_text)
 
 
 
 
169
 
170
- if output_mode == "Roman Urdu":
171
- cleaned_text = convert_to_roman_urdu(cleaned_text)
172
 
173
- word_count = len(cleaned_text.split())
174
- processing_time = round(end_time - start_time, 2)
175
 
176
- col1, col2 = st.columns(2)
177
 
178
- with col1:
179
- st.subheader("📜 Raw Transcription")
180
- st.text_area("", full_text, height=350)
181
 
182
- with col2:
183
- st.subheader("✨ Clean Story Format")
184
- st.text_area("", cleaned_text, height=350)
185
 
186
- st.divider()
187
 
188
- st.write(f"**Detected Language:** {detected_lang}")
189
- st.write(f"**Word Count:** {word_count}")
190
- st.write(f"**Processing Time:** {processing_time} seconds")
191
 
192
- # ----------------------------
193
- # EXPORT FILES
194
- # ----------------------------
195
- excel_file = create_excel(segments)
196
- word_file = create_word_document(cleaned_text)
197
 
198
- colA, colB = st.columns(2)
199
 
200
- with colA:
201
- st.download_button(
202
- label="📥 Download Excel (.xlsx)",
203
- data=excel_file,
204
- file_name="RecToText_Transcription.xlsx",
205
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
206
- )
207
 
208
- with colB:
209
- st.download_button(
210
- label="📄 Download Word (.docx)",
211
- data=word_file,
212
- file_name="RecToText_Lecture.docx",
213
- mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
214
- )
215
 
216
- st.divider()
217
- st.markdown(
218
- "<p style='text-align:center; font-size:12px;'>RecToText Pro | Whisper + Streamlit Powered</p>",
219
- unsafe_allow_html=True
220
- )
 
4
  import os
5
  import time
6
  import re
7
+ import torch
8
  from pydub import AudioSegment
9
  from openpyxl import Workbook
10
  from openpyxl.styles import Font
 
22
  page_icon="🎤"
23
  )
24
 
25
+ st.title("🎤 RecToText Pro")
26
+ st.caption("Stable Production Version | CPU Optimized")
27
+
28
  # ---------------------------------------------------
29
+ # SIDEBAR
30
  # ---------------------------------------------------
 
 
31
  model_option = st.sidebar.selectbox(
32
  "Select Whisper Model",
33
+ ["base"] # Force base for stability
34
  )
35
 
36
  output_mode = st.sidebar.radio(
 
38
  ["Roman Urdu", "English"]
39
  )
40
 
 
 
 
 
41
  # ---------------------------------------------------
42
+ # LOAD MODEL (FORCE CPU)
43
  # ---------------------------------------------------
44
+ @st.cache_resource
45
+ def load_model():
46
+ return whisper.load_model("base", device="cpu")
47
 
48
  # ---------------------------------------------------
49
+ # CLEAN TEXT
50
  # ---------------------------------------------------
 
 
 
 
 
51
  def clean_text(text):
52
+ filler_words = ["um", "hmm", "acha", "matlab", "uh"]
53
  pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
54
  text = re.sub(pattern, '', text, flags=re.IGNORECASE)
55
  text = re.sub(r'\s+', ' ', text).strip()
56
+ return text
57
 
58
+ # ---------------------------------------------------
59
+ # ROMAN URDU
60
+ # ---------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def convert_to_roman_urdu(text):
62
  replacements = {
63
  "ہے": "hai",
64
  "میں": "main",
65
  "اور": "aur",
66
+ "کیا": "kya"
 
 
 
67
  }
68
  for urdu, roman in replacements.items():
69
  text = text.replace(urdu, roman)
70
  return text
71
 
72
+ # ---------------------------------------------------
73
+ # EXCEL EXPORT
74
+ # ---------------------------------------------------
75
+ def create_excel(text):
76
  wb = Workbook()
77
  ws = wb.active
78
+ ws.append(["Transcription"])
79
+ ws["A1"].font = Font(bold=True)
80
+ ws.append([text])
81
 
82
+ buffer = BytesIO()
83
+ wb.save(buffer)
84
+ buffer.seek(0)
85
+ return buffer
86
 
87
+ # ---------------------------------------------------
88
+ # WORD EXPORT
89
+ # ---------------------------------------------------
90
+ def create_word(text):
 
 
 
 
 
 
 
 
 
 
 
91
  doc = Document()
92
+ doc.add_heading("Lecture Transcription", level=1)
93
+ doc.add_paragraph(text)
94
 
95
+ buffer = BytesIO()
96
+ doc.save(buffer)
97
+ buffer.seek(0)
98
+ return buffer
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  # ---------------------------------------------------
101
  # FILE UPLOADER
102
  # ---------------------------------------------------
103
  uploaded_file = st.file_uploader(
104
+ "Upload Lecture (.mp3, .wav, .m4a, .aac)",
105
  type=["mp3", "wav", "m4a", "aac"]
106
  )
107
 
108
  if uploaded_file:
109
 
110
+ try:
111
+ st.audio(uploaded_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
114
+ ext = uploaded_file.name.split(".")[-1]
115
+ audio = AudioSegment.from_file(uploaded_file, format=ext)
116
+ audio.export(tmp.name, format="wav")
117
+ temp_path = tmp.name
118
 
119
+ model = load_model()
 
120
 
121
+ with st.spinner("Transcribing safely on CPU..."):
122
+ result = model.transcribe(temp_path)
123
 
124
+ os.remove(temp_path)
125
 
126
+ text = result["text"]
127
+ cleaned = clean_text(text)
 
128
 
129
+ if output_mode == "Roman Urdu":
130
+ cleaned = convert_to_roman_urdu(cleaned)
 
131
 
132
+ st.success("Transcription Completed ✅")
133
 
134
+ st.text_area("Output", cleaned, height=300)
 
 
135
 
136
+ excel_file = create_excel(cleaned)
137
+ word_file = create_word(cleaned)
 
 
 
138
 
139
+ col1, col2 = st.columns(2)
140
 
141
+ with col1:
142
+ st.download_button(
143
+ "Download Excel",
144
+ excel_file,
145
+ "RecToText.xlsx"
146
+ )
 
147
 
148
+ with col2:
149
+ st.download_button(
150
+ "Download Word",
151
+ word_file,
152
+ "RecToText.docx"
153
+ )
 
154
 
155
+ except Exception as e:
156
+ st.error("Processing Error Occurred.")
157
+ st.exception(e)