MiakOnline commited on
Commit
9a53ebf
·
verified ·
1 Parent(s): a53d6d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -33
app.py CHANGED
@@ -4,42 +4,37 @@ import os
4
  import re
5
  import time
6
  from pydub import AudioSegment
7
- from transformers import pipeline
8
  from openpyxl import Workbook
9
  from docx import Document
10
  from io import BytesIO
11
 
12
  st.set_page_config(page_title="RecToText Pro", layout="wide")
13
 
14
- st.title("🎤 RecToText Pro - Stable Long Audio Edition")
15
- st.caption("Long Lecture Supported | Word + Excel Export")
16
 
17
- # --------------------------------------------------
18
- # LOAD MODEL (CPU SAFE)
19
- # --------------------------------------------------
20
  @st.cache_resource
21
- def load_asr():
22
- return pipeline(
23
- "automatic-speech-recognition",
24
- model="openai/whisper-base",
25
- device=-1,
26
- return_timestamps=True # FIX FOR LONG AUDIO
27
- )
28
 
29
- asr = load_asr()
30
 
31
- # --------------------------------------------------
32
  # CLEAN TEXT
33
- # --------------------------------------------------
34
  def clean_text(text):
35
- filler = ["um", "hmm", "acha", "matlab"]
36
  pattern = r'\b(?:' + '|'.join(filler) + r')\b'
37
  text = re.sub(pattern, "", text, flags=re.IGNORECASE)
38
  return re.sub(r'\s+', ' ', text).strip()
39
 
40
- # --------------------------------------------------
41
  # ROMAN URDU
42
- # --------------------------------------------------
43
  def convert_to_roman(text):
44
  replacements = {
45
  "ہے": "hai",
@@ -51,9 +46,9 @@ def convert_to_roman(text):
51
  text = text.replace(k, v)
52
  return text
53
 
54
- # --------------------------------------------------
55
  # EXPORT EXCEL
56
- # --------------------------------------------------
57
  def export_excel(text):
58
  wb = Workbook()
59
  ws = wb.active
@@ -64,9 +59,9 @@ def export_excel(text):
64
  buffer.seek(0)
65
  return buffer
66
 
67
- # --------------------------------------------------
68
  # EXPORT WORD
69
- # --------------------------------------------------
70
  def export_word(text):
71
  doc = Document()
72
  doc.add_heading("Lecture Transcription", level=1)
@@ -76,9 +71,9 @@ def export_word(text):
76
  buffer.seek(0)
77
  return buffer
78
 
79
- # --------------------------------------------------
80
  # FILE UPLOADER
81
- # --------------------------------------------------
82
  uploaded = st.file_uploader(
83
  "Upload Audio (.mp3, .wav, .m4a, .aac)",
84
  type=["mp3", "wav", "m4a", "aac"]
@@ -90,6 +85,7 @@ if uploaded:
90
  try:
91
  st.audio(uploaded)
92
 
 
93
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
94
  ext = uploaded.name.split(".")[-1]
95
  audio = AudioSegment.from_file(uploaded, format=ext)
@@ -98,16 +94,14 @@ if uploaded:
98
 
99
  start = time.time()
100
 
101
- with st.spinner("Transcribing long audio safely..."):
102
- result = asr(temp_path)
103
 
104
- os.remove(temp_path)
 
 
105
 
106
- # FIX: Extract text from chunks safely
107
- if isinstance(result, dict) and "chunks" in result:
108
- text = " ".join([chunk["text"] for chunk in result["chunks"]])
109
- else:
110
- text = result["text"]
111
 
112
  text = clean_text(text)
113
 
 
4
  import re
5
  import time
6
  from pydub import AudioSegment
7
+ from faster_whisper import WhisperModel
8
  from openpyxl import Workbook
9
  from docx import Document
10
  from io import BytesIO
11
 
12
  st.set_page_config(page_title="RecToText Pro", layout="wide")
13
 
14
+ st.title("🎤 RecToText Pro - Stable Production Version")
15
+ st.caption("Long Audio Safe | No Transformer Errors")
16
 
17
+ # -------------------------------
18
+ # LOAD MODEL (INT8 CPU SAFE)
19
+ # -------------------------------
20
  @st.cache_resource
21
+ def load_model():
22
+ return WhisperModel("base", device="cpu", compute_type="int8")
 
 
 
 
 
23
 
24
+ model = load_model()
25
 
26
+ # -------------------------------
27
  # CLEAN TEXT
28
+ # -------------------------------
29
  def clean_text(text):
30
+ filler = ["um", "hmm", "acha", "matlab", "uh"]
31
  pattern = r'\b(?:' + '|'.join(filler) + r')\b'
32
  text = re.sub(pattern, "", text, flags=re.IGNORECASE)
33
  return re.sub(r'\s+', ' ', text).strip()
34
 
35
+ # -------------------------------
36
  # ROMAN URDU
37
+ # -------------------------------
38
  def convert_to_roman(text):
39
  replacements = {
40
  "ہے": "hai",
 
46
  text = text.replace(k, v)
47
  return text
48
 
49
+ # -------------------------------
50
  # EXPORT EXCEL
51
+ # -------------------------------
52
  def export_excel(text):
53
  wb = Workbook()
54
  ws = wb.active
 
59
  buffer.seek(0)
60
  return buffer
61
 
62
+ # -------------------------------
63
  # EXPORT WORD
64
+ # -------------------------------
65
  def export_word(text):
66
  doc = Document()
67
  doc.add_heading("Lecture Transcription", level=1)
 
71
  buffer.seek(0)
72
  return buffer
73
 
74
+ # -------------------------------
75
  # FILE UPLOADER
76
+ # -------------------------------
77
  uploaded = st.file_uploader(
78
  "Upload Audio (.mp3, .wav, .m4a, .aac)",
79
  type=["mp3", "wav", "m4a", "aac"]
 
85
  try:
86
  st.audio(uploaded)
87
 
88
+ # Convert to WAV
89
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
90
  ext = uploaded.name.split(".")[-1]
91
  audio = AudioSegment.from_file(uploaded, format=ext)
 
94
 
95
  start = time.time()
96
 
97
+ with st.spinner("Transcribing safely..."):
98
+ segments, info = model.transcribe(temp_path)
99
 
100
+ text = ""
101
+ for segment in segments:
102
+ text += segment.text + " "
103
 
104
+ os.remove(temp_path)
 
 
 
 
105
 
106
  text = clean_text(text)
107