MiakOnline commited on
Commit
eebed8e
·
verified ·
1 Parent(s): 9a53ebf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -62
app.py CHANGED
@@ -6,124 +6,219 @@ import time
6
  from pydub import AudioSegment
7
  from faster_whisper import WhisperModel
8
  from openpyxl import Workbook
 
9
  from docx import Document
 
10
  from io import BytesIO
11
 
 
 
 
12
  st.set_page_config(page_title="RecToText Pro", layout="wide")
13
 
14
- st.title("🎤 RecToText Pro - Stable Production Version")
15
- st.caption("Long Audio Safe | No Transformer Errors")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- # -------------------------------
18
- # LOAD MODEL (INT8 CPU SAFE)
19
- # -------------------------------
 
 
 
 
20
  @st.cache_resource
21
- def load_model():
22
- return WhisperModel("base", device="cpu", compute_type="int8")
23
 
24
- model = load_model()
25
 
26
- # -------------------------------
27
- # CLEAN TEXT
28
- # -------------------------------
29
  def clean_text(text):
30
- filler = ["um", "hmm", "acha", "matlab", "uh"]
31
- pattern = r'\b(?:' + '|'.join(filler) + r')\b'
32
  text = re.sub(pattern, "", text, flags=re.IGNORECASE)
33
- return re.sub(r'\s+', ' ', text).strip()
 
 
 
 
 
 
 
 
 
 
34
 
35
- # -------------------------------
36
- # ROMAN URDU
37
- # -------------------------------
38
- def convert_to_roman(text):
 
 
39
  replacements = {
40
  "ہے": "hai",
41
  "میں": "main",
42
  "اور": "aur",
43
- "کیا": "kya"
 
 
44
  }
45
- for k, v in replacements.items():
46
- text = text.replace(k, v)
47
  return text
48
 
49
- # -------------------------------
50
  # EXPORT EXCEL
51
- # -------------------------------
52
- def export_excel(text):
53
  wb = Workbook()
54
  ws = wb.active
55
- ws.append(["Lecture Transcription"])
56
- ws.append([text])
 
 
 
 
 
 
 
 
 
 
 
 
57
  buffer = BytesIO()
58
  wb.save(buffer)
59
  buffer.seek(0)
60
  return buffer
61
 
62
- # -------------------------------
63
  # EXPORT WORD
64
- # -------------------------------
65
- def export_word(text):
66
  doc = Document()
67
- doc.add_heading("Lecture Transcription", level=1)
68
- doc.add_paragraph(text)
 
 
 
 
 
 
69
  buffer = BytesIO()
70
  doc.save(buffer)
71
  buffer.seek(0)
72
  return buffer
73
 
74
- # -------------------------------
75
- # FILE UPLOADER
76
- # -------------------------------
77
- uploaded = st.file_uploader(
78
- "Upload Audio (.mp3, .wav, .m4a, .aac)",
79
  type=["mp3", "wav", "m4a", "aac"]
80
  )
81
 
82
- output_mode = st.radio("Output Format", ["English", "Roman Urdu"])
83
 
84
- if uploaded:
85
  try:
86
- st.audio(uploaded)
87
 
88
- # Convert to WAV
89
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
90
- ext = uploaded.name.split(".")[-1]
91
- audio = AudioSegment.from_file(uploaded, format=ext)
92
  audio.export(tmp.name, format="wav")
93
- temp_path = tmp.name
94
 
95
- start = time.time()
96
 
97
- with st.spinner("Transcribing safely..."):
98
- segments, info = model.transcribe(temp_path)
99
 
100
- text = ""
101
- for segment in segments:
102
- text += segment.text + " "
103
 
104
- os.remove(temp_path)
 
105
 
106
- text = clean_text(text)
 
 
107
 
108
- if output_mode == "Roman Urdu":
109
- text = convert_to_roman(text)
110
 
111
- st.success("Transcription Completed ")
112
- st.text_area("Output", text, height=300)
113
 
114
- excel_file = export_excel(text)
115
- word_file = export_word(text)
 
116
 
117
  col1, col2 = st.columns(2)
118
 
119
  with col1:
120
- st.download_button("Download Excel", excel_file, "RecToText.xlsx")
 
121
 
122
  with col2:
123
- st.download_button("Download Word", word_file, "RecToText.docx")
 
 
 
 
 
 
 
124
 
125
- st.write(f"Processing Time: {round(time.time()-start,2)} sec")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  except Exception as e:
128
- st.error("Error Occurred")
129
- st.exception(e)
 
 
 
 
6
  from pydub import AudioSegment
7
  from faster_whisper import WhisperModel
8
  from openpyxl import Workbook
9
+ from openpyxl.styles import Font
10
  from docx import Document
11
+ from docx.shared import Pt
12
  from io import BytesIO
13
 
14
+ # -----------------------------------------------------
15
+ # PAGE CONFIG
16
+ # -----------------------------------------------------
17
  st.set_page_config(page_title="RecToText Pro", layout="wide")
18
 
19
+ # Increase upload limit to 200MB
20
+ st.markdown("""
21
+ <style>
22
+ .block-container {padding-top: 2rem;}
23
+ </style>
24
+ """, unsafe_allow_html=True)
25
+
26
+ # -----------------------------------------------------
27
+ # HEADER
28
+ # -----------------------------------------------------
29
+ st.title("🎤 RecToText Pro – Intelligent Lecture Transcriber")
30
+ st.caption("Upload Lecture | AI Transcription | Excel & Word Export")
31
+
32
+ # -----------------------------------------------------
33
+ # SIDEBAR CONTROLS
34
+ # -----------------------------------------------------
35
+ st.sidebar.header("⚙️ Settings")
36
+
37
+ model_size = st.sidebar.selectbox(
38
+ "Whisper Model",
39
+ ["base", "small"]
40
+ )
41
+
42
+ output_format = st.sidebar.radio(
43
+ "Output Format",
44
+ ["English", "Roman Urdu"]
45
+ )
46
 
47
+ if st.sidebar.button("🧹 Clear Session"):
48
+ st.session_state.clear()
49
+ st.rerun()
50
+
51
+ # -----------------------------------------------------
52
+ # LOAD WHISPER MODEL (CPU INT8 OPTIMIZED)
53
+ # -----------------------------------------------------
54
  @st.cache_resource
55
+ def load_model(size):
56
+ return WhisperModel(size, device="cpu", compute_type="int8")
57
 
58
+ model = load_model(model_size)
59
 
60
+ # -----------------------------------------------------
61
+ # TEXT PROCESSING FUNCTIONS
62
+ # -----------------------------------------------------
63
  def clean_text(text):
64
+ filler_words = ["um", "hmm", "acha", "matlab", "uh"]
65
+ pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
66
  text = re.sub(pattern, "", text, flags=re.IGNORECASE)
67
+ text = re.sub(r'\s+', ' ', text).strip()
68
+
69
+ sentences = re.split(r'(?<=[.!?]) +', text)
70
+ paragraphs = []
71
+ temp = ""
72
+
73
+ for i, sentence in enumerate(sentences):
74
+ temp += sentence + " "
75
+ if (i + 1) % 5 == 0:
76
+ paragraphs.append(temp.strip())
77
+ temp = ""
78
 
79
+ if temp:
80
+ paragraphs.append(temp.strip())
81
+
82
+ return "\n\n".join(paragraphs)
83
+
84
+ def convert_to_roman_urdu(text):
85
  replacements = {
86
  "ہے": "hai",
87
  "میں": "main",
88
  "اور": "aur",
89
+ "کیا": "kya",
90
+ "کی": "ki",
91
+ "کا": "ka"
92
  }
93
+ for urdu, roman in replacements.items():
94
+ text = text.replace(urdu, roman)
95
  return text
96
 
97
+ # -----------------------------------------------------
98
  # EXPORT EXCEL
99
+ # -----------------------------------------------------
100
+ def export_excel(segments):
101
  wb = Workbook()
102
  ws = wb.active
103
+ ws.title = "Transcription"
104
+
105
+ headers = ["Timestamp", "Original Text", "Cleaned Text"]
106
+ ws.append(headers)
107
+
108
+ for col in range(1, 4):
109
+ ws.cell(row=1, column=col).font = Font(bold=True)
110
+
111
+ for segment in segments:
112
+ timestamp = f"{round(segment.start,2)} - {round(segment.end,2)}"
113
+ original = segment.text.strip()
114
+ cleaned = clean_text(original)
115
+ ws.append([timestamp, original, cleaned])
116
+
117
  buffer = BytesIO()
118
  wb.save(buffer)
119
  buffer.seek(0)
120
  return buffer
121
 
122
+ # -----------------------------------------------------
123
  # EXPORT WORD
124
+ # -----------------------------------------------------
125
+ def export_word(title, cleaned_text):
126
  doc = Document()
127
+ doc.add_heading(title, level=1)
128
+ doc.add_paragraph("")
129
+ paragraphs = cleaned_text.split("\n\n")
130
+
131
+ for para in paragraphs:
132
+ p = doc.add_paragraph(para)
133
+ p.paragraph_format.space_after = Pt(12)
134
+
135
  buffer = BytesIO()
136
  doc.save(buffer)
137
  buffer.seek(0)
138
  return buffer
139
 
140
+ # -----------------------------------------------------
141
+ # FILE UPLOADER (200MB SUPPORT)
142
+ # -----------------------------------------------------
143
+ uploaded_file = st.file_uploader(
144
+ "Upload Lecture Recording (Max 200MB) – MP3, WAV, M4A, AAC",
145
  type=["mp3", "wav", "m4a", "aac"]
146
  )
147
 
148
+ if uploaded_file:
149
 
 
150
  try:
151
+ st.audio(uploaded_file)
152
 
 
153
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
154
+ ext = uploaded_file.name.split(".")[-1]
155
+ audio = AudioSegment.from_file(uploaded_file, format=ext)
156
  audio.export(tmp.name, format="wav")
157
+ temp_audio_path = tmp.name
158
 
159
+ start_time = time.time()
160
 
161
+ with st.spinner("🔄 Transcribing... Please wait"):
162
+ segments, info = model.transcribe(temp_audio_path)
163
 
164
+ os.remove(temp_audio_path)
 
 
165
 
166
+ full_text = ""
167
+ segment_list = []
168
 
169
+ for segment in segments:
170
+ full_text += segment.text + " "
171
+ segment_list.append(segment)
172
 
173
+ cleaned_text = clean_text(full_text)
 
174
 
175
+ if output_format == "Roman Urdu":
176
+ cleaned_text = convert_to_roman_urdu(cleaned_text)
177
 
178
+ word_count = len(cleaned_text.split())
179
+ processing_time = round(time.time() - start_time, 2)
180
+ detected_language = info.language
181
 
182
  col1, col2 = st.columns(2)
183
 
184
  with col1:
185
+ st.subheader("📜 Raw Transcription")
186
+ st.text_area("", full_text, height=300)
187
 
188
  with col2:
189
+ st.subheader(" Clean Story Format")
190
+ st.text_area("", cleaned_text, height=300)
191
+
192
+ st.divider()
193
+
194
+ st.write(f"**Detected Language:** {detected_language}")
195
+ st.write(f"**Word Count:** {word_count}")
196
+ st.write(f"**Processing Time:** {processing_time} sec")
197
 
198
+ excel_file = export_excel(segment_list)
199
+ word_file = export_word("Lecture Transcription", cleaned_text)
200
+
201
+ colA, colB = st.columns(2)
202
+
203
+ with colA:
204
+ st.download_button(
205
+ "📥 Download Excel (.xlsx)",
206
+ data=excel_file,
207
+ file_name="RecToText_Transcription.xlsx"
208
+ )
209
+
210
+ with colB:
211
+ st.download_button(
212
+ "📄 Download Word (.docx)",
213
+ data=word_file,
214
+ file_name="RecToText_Lecture.docx"
215
+ )
216
+
217
+ st.success("✅ Transcription Completed Successfully!")
218
 
219
  except Exception as e:
220
+ st.error("Error Occurred During Processing")
221
+ st.exception(e)
222
+
223
+ st.markdown("---")
224
+ st.markdown("<center>Developed with ❤️ using Whisper & Streamlit</center>", unsafe_allow_html=True)