MiakOnline commited on
Commit
ddfd8e5
·
verified ·
1 Parent(s): f47f4a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -85
app.py CHANGED
@@ -1,157 +1,130 @@
1
  import streamlit as st
2
- import whisper
3
  import tempfile
4
  import os
5
- import time
6
  import re
 
7
  import torch
8
  from pydub import AudioSegment
 
9
  from openpyxl import Workbook
10
- from openpyxl.styles import Font
11
  from docx import Document
12
- from docx.shared import Pt
13
- from docx.enum.text import WD_ALIGN_PARAGRAPH
14
  from io import BytesIO
15
 
16
- # ---------------------------------------------------
17
- # PAGE CONFIG
18
- # ---------------------------------------------------
19
- st.set_page_config(
20
- page_title="RecToText Pro",
21
- layout="wide",
22
- page_icon="🎤"
23
- )
24
 
25
  st.title("🎤 RecToText Pro")
26
- st.caption("Stable Production Version | CPU Optimized")
27
-
28
- # ---------------------------------------------------
29
- # SIDEBAR
30
- # ---------------------------------------------------
31
- model_option = st.sidebar.selectbox(
32
- "Select Whisper Model",
33
- ["base"] # Force base for stability
34
- )
35
 
36
- output_mode = st.sidebar.radio(
37
- "Output Format",
38
- ["Roman Urdu", "English"]
39
- )
40
-
41
- # ---------------------------------------------------
42
- # LOAD MODEL (FORCE CPU)
43
- # ---------------------------------------------------
44
  @st.cache_resource
45
- def load_model():
46
- return whisper.load_model("base", device="cpu")
47
-
48
- # ---------------------------------------------------
49
- # CLEAN TEXT
50
- # ---------------------------------------------------
 
 
 
 
 
 
51
  def clean_text(text):
52
- filler_words = ["um", "hmm", "acha", "matlab", "uh"]
53
- pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
54
- text = re.sub(pattern, '', text, flags=re.IGNORECASE)
55
- text = re.sub(r'\s+', ' ', text).strip()
56
- return text
57
-
58
- # ---------------------------------------------------
59
- # ROMAN URDU
60
- # ---------------------------------------------------
61
- def convert_to_roman_urdu(text):
62
  replacements = {
63
  "ہے": "hai",
64
  "میں": "main",
65
  "اور": "aur",
66
  "کیا": "kya"
67
  }
68
- for urdu, roman in replacements.items():
69
- text = text.replace(urdu, roman)
70
  return text
71
 
72
- # ---------------------------------------------------
73
  # EXCEL EXPORT
74
- # ---------------------------------------------------
75
- def create_excel(text):
76
  wb = Workbook()
77
  ws = wb.active
78
  ws.append(["Transcription"])
79
- ws["A1"].font = Font(bold=True)
80
  ws.append([text])
81
-
82
  buffer = BytesIO()
83
  wb.save(buffer)
84
  buffer.seek(0)
85
  return buffer
86
 
87
- # ---------------------------------------------------
88
  # WORD EXPORT
89
- # ---------------------------------------------------
90
- def create_word(text):
91
  doc = Document()
92
  doc.add_heading("Lecture Transcription", level=1)
93
  doc.add_paragraph(text)
94
-
95
  buffer = BytesIO()
96
  doc.save(buffer)
97
  buffer.seek(0)
98
  return buffer
99
 
100
- # ---------------------------------------------------
101
  # FILE UPLOADER
102
- # ---------------------------------------------------
103
- uploaded_file = st.file_uploader(
104
- "Upload Lecture (.mp3, .wav, .m4a, .aac)",
105
  type=["mp3", "wav", "m4a", "aac"]
106
  )
107
 
108
- if uploaded_file:
109
 
 
110
  try:
111
- st.audio(uploaded_file)
112
 
113
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
114
- ext = uploaded_file.name.split(".")[-1]
115
- audio = AudioSegment.from_file(uploaded_file, format=ext)
116
  audio.export(tmp.name, format="wav")
117
  temp_path = tmp.name
118
 
119
- model = load_model()
120
 
121
- with st.spinner("Transcribing safely on CPU..."):
122
- result = model.transcribe(temp_path)
123
 
124
  os.remove(temp_path)
125
 
126
  text = result["text"]
127
- cleaned = clean_text(text)
128
 
129
  if output_mode == "Roman Urdu":
130
- cleaned = convert_to_roman_urdu(cleaned)
131
 
132
  st.success("Transcription Completed ✅")
 
133
 
134
- st.text_area("Output", cleaned, height=300)
135
-
136
- excel_file = create_excel(cleaned)
137
- word_file = create_word(cleaned)
138
 
139
  col1, col2 = st.columns(2)
140
 
141
  with col1:
142
- st.download_button(
143
- "Download Excel",
144
- excel_file,
145
- "RecToText.xlsx"
146
- )
147
 
148
  with col2:
149
- st.download_button(
150
- "Download Word",
151
- word_file,
152
- "RecToText.docx"
153
- )
154
 
155
  except Exception as e:
156
- st.error("Processing Error Occurred.")
157
  st.exception(e)
 
1
  import streamlit as st
 
2
  import tempfile
3
  import os
 
4
  import re
5
+ import time
6
  import torch
7
  from pydub import AudioSegment
8
+ from transformers import pipeline
9
  from openpyxl import Workbook
 
10
  from docx import Document
 
 
11
  from io import BytesIO
12
 
13
+ st.set_page_config(page_title="RecToText Pro", layout="wide")
 
 
 
 
 
 
 
14
 
15
  st.title("🎤 RecToText Pro")
16
+ st.caption("Stable HuggingFace Build Version")
 
 
 
 
 
 
 
 
17
 
18
+ # -------------------------
19
+ # LOAD MODEL (HF PIPELINE)
20
+ # -------------------------
 
 
 
 
 
21
  @st.cache_resource
22
+ def load_asr():
23
+ return pipeline(
24
+ "automatic-speech-recognition",
25
+ model="openai/whisper-base",
26
+ device=-1 # CPU
27
+ )
28
+
29
+ asr = load_asr()
30
+
31
+ # -------------------------
32
+ # TEXT CLEANING
33
+ # -------------------------
34
  def clean_text(text):
35
+ filler = ["um", "hmm", "acha", "matlab"]
36
+ pattern = r'\b(?:' + '|'.join(filler) + r')\b'
37
+ text = re.sub(pattern, "", text, flags=re.IGNORECASE)
38
+ return re.sub(r'\s+', ' ', text).strip()
39
+
40
+ # -------------------------
41
+ # ROMAN URDU BASIC
42
+ # -------------------------
43
+ def convert_to_roman(text):
 
44
  replacements = {
45
  "ہے": "hai",
46
  "میں": "main",
47
  "اور": "aur",
48
  "کیا": "kya"
49
  }
50
+ for k, v in replacements.items():
51
+ text = text.replace(k, v)
52
  return text
53
 
54
+ # -------------------------
55
  # EXCEL EXPORT
56
+ # -------------------------
57
+ def export_excel(text):
58
  wb = Workbook()
59
  ws = wb.active
60
  ws.append(["Transcription"])
 
61
  ws.append([text])
 
62
  buffer = BytesIO()
63
  wb.save(buffer)
64
  buffer.seek(0)
65
  return buffer
66
 
67
+ # -------------------------
68
  # WORD EXPORT
69
+ # -------------------------
70
+ def export_word(text):
71
  doc = Document()
72
  doc.add_heading("Lecture Transcription", level=1)
73
  doc.add_paragraph(text)
 
74
  buffer = BytesIO()
75
  doc.save(buffer)
76
  buffer.seek(0)
77
  return buffer
78
 
79
+ # -------------------------
80
  # FILE UPLOADER
81
+ # -------------------------
82
+ uploaded = st.file_uploader(
83
+ "Upload Audio (.mp3, .wav, .m4a, .aac)",
84
  type=["mp3", "wav", "m4a", "aac"]
85
  )
86
 
87
+ output_mode = st.radio("Output Format", ["English", "Roman Urdu"])
88
 
89
+ if uploaded:
90
  try:
91
+ st.audio(uploaded)
92
 
93
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
94
+ ext = uploaded.name.split(".")[-1]
95
+ audio = AudioSegment.from_file(uploaded, format=ext)
96
  audio.export(tmp.name, format="wav")
97
  temp_path = tmp.name
98
 
99
+ start = time.time()
100
 
101
+ with st.spinner("Transcribing..."):
102
+ result = asr(temp_path)
103
 
104
  os.remove(temp_path)
105
 
106
  text = result["text"]
107
+ text = clean_text(text)
108
 
109
  if output_mode == "Roman Urdu":
110
+ text = convert_to_roman(text)
111
 
112
  st.success("Transcription Completed ✅")
113
+ st.text_area("Output", text, height=300)
114
 
115
+ excel_file = export_excel(text)
116
+ word_file = export_word(text)
 
 
117
 
118
  col1, col2 = st.columns(2)
119
 
120
  with col1:
121
+ st.download_button("Download Excel", excel_file, "RecToText.xlsx")
 
 
 
 
122
 
123
  with col2:
124
+ st.download_button("Download Word", word_file, "RecToText.docx")
125
+
126
+ st.write(f"Processing Time: {round(time.time()-start,2)} sec")
 
 
127
 
128
  except Exception as e:
129
+ st.error("Error Occurred")
130
  st.exception(e)