MiakOnline commited on
Commit
a53d6d2
·
verified ·
1 Parent(s): a270792

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -24
app.py CHANGED
@@ -3,7 +3,6 @@ import tempfile
3
  import os
4
  import re
5
  import time
6
- import torch
7
  from pydub import AudioSegment
8
  from transformers import pipeline
9
  from openpyxl import Workbook
@@ -12,34 +11,35 @@ from io import BytesIO
12
 
13
  st.set_page_config(page_title="RecToText Pro", layout="wide")
14
 
15
- st.title("🎤 RecToText Pro")
16
- st.caption("Stable HuggingFace Build Version")
17
 
18
- # -------------------------
19
- # LOAD MODEL (HF PIPELINE)
20
- # -------------------------
21
  @st.cache_resource
22
  def load_asr():
23
  return pipeline(
24
  "automatic-speech-recognition",
25
  model="openai/whisper-base",
26
- device=-1 # CPU
 
27
  )
28
 
29
  asr = load_asr()
30
 
31
- # -------------------------
32
- # TEXT CLEANING
33
- # -------------------------
34
  def clean_text(text):
35
  filler = ["um", "hmm", "acha", "matlab"]
36
  pattern = r'\b(?:' + '|'.join(filler) + r')\b'
37
  text = re.sub(pattern, "", text, flags=re.IGNORECASE)
38
  return re.sub(r'\s+', ' ', text).strip()
39
 
40
- # -------------------------
41
- # ROMAN URDU BASIC
42
- # -------------------------
43
  def convert_to_roman(text):
44
  replacements = {
45
  "ہے": "hai",
@@ -51,22 +51,22 @@ def convert_to_roman(text):
51
  text = text.replace(k, v)
52
  return text
53
 
54
- # -------------------------
55
- # EXCEL EXPORT
56
- # -------------------------
57
  def export_excel(text):
58
  wb = Workbook()
59
  ws = wb.active
60
- ws.append(["Transcription"])
61
  ws.append([text])
62
  buffer = BytesIO()
63
  wb.save(buffer)
64
  buffer.seek(0)
65
  return buffer
66
 
67
- # -------------------------
68
- # WORD EXPORT
69
- # -------------------------
70
  def export_word(text):
71
  doc = Document()
72
  doc.add_heading("Lecture Transcription", level=1)
@@ -76,9 +76,9 @@ def export_word(text):
76
  buffer.seek(0)
77
  return buffer
78
 
79
- # -------------------------
80
  # FILE UPLOADER
81
- # -------------------------
82
  uploaded = st.file_uploader(
83
  "Upload Audio (.mp3, .wav, .m4a, .aac)",
84
  type=["mp3", "wav", "m4a", "aac"]
@@ -98,12 +98,17 @@ if uploaded:
98
 
99
  start = time.time()
100
 
101
- with st.spinner("Transcribing..."):
102
  result = asr(temp_path)
103
 
104
  os.remove(temp_path)
105
 
106
- text = result["text"]
 
 
 
 
 
107
  text = clean_text(text)
108
 
109
  if output_mode == "Roman Urdu":
 
3
  import os
4
  import re
5
  import time
 
6
  from pydub import AudioSegment
7
  from transformers import pipeline
8
  from openpyxl import Workbook
 
11
 
12
  st.set_page_config(page_title="RecToText Pro", layout="wide")
13
 
14
+ st.title("🎤 RecToText Pro - Stable Long Audio Edition")
15
+ st.caption("Long Lecture Supported | Word + Excel Export")
16
 
17
+ # --------------------------------------------------
18
+ # LOAD MODEL (CPU SAFE)
19
+ # --------------------------------------------------
20
  @st.cache_resource
21
  def load_asr():
22
  return pipeline(
23
  "automatic-speech-recognition",
24
  model="openai/whisper-base",
25
+ device=-1,
26
+ return_timestamps=True # FIX FOR LONG AUDIO
27
  )
28
 
29
  asr = load_asr()
30
 
31
+ # --------------------------------------------------
32
+ # CLEAN TEXT
33
+ # --------------------------------------------------
34
  def clean_text(text):
35
  filler = ["um", "hmm", "acha", "matlab"]
36
  pattern = r'\b(?:' + '|'.join(filler) + r')\b'
37
  text = re.sub(pattern, "", text, flags=re.IGNORECASE)
38
  return re.sub(r'\s+', ' ', text).strip()
39
 
40
+ # --------------------------------------------------
41
+ # ROMAN URDU
42
+ # --------------------------------------------------
43
  def convert_to_roman(text):
44
  replacements = {
45
  "ہے": "hai",
 
51
  text = text.replace(k, v)
52
  return text
53
 
54
+ # --------------------------------------------------
55
+ # EXPORT EXCEL
56
+ # --------------------------------------------------
57
  def export_excel(text):
58
  wb = Workbook()
59
  ws = wb.active
60
+ ws.append(["Lecture Transcription"])
61
  ws.append([text])
62
  buffer = BytesIO()
63
  wb.save(buffer)
64
  buffer.seek(0)
65
  return buffer
66
 
67
+ # --------------------------------------------------
68
+ # EXPORT WORD
69
+ # --------------------------------------------------
70
  def export_word(text):
71
  doc = Document()
72
  doc.add_heading("Lecture Transcription", level=1)
 
76
  buffer.seek(0)
77
  return buffer
78
 
79
+ # --------------------------------------------------
80
  # FILE UPLOADER
81
+ # --------------------------------------------------
82
  uploaded = st.file_uploader(
83
  "Upload Audio (.mp3, .wav, .m4a, .aac)",
84
  type=["mp3", "wav", "m4a", "aac"]
 
98
 
99
  start = time.time()
100
 
101
+ with st.spinner("Transcribing long audio safely..."):
102
  result = asr(temp_path)
103
 
104
  os.remove(temp_path)
105
 
106
+ # FIX: Extract text from chunks safely
107
+ if isinstance(result, dict) and "chunks" in result:
108
+ text = " ".join([chunk["text"] for chunk in result["chunks"]])
109
+ else:
110
+ text = result["text"]
111
+
112
  text = clean_text(text)
113
 
114
  if output_mode == "Roman Urdu":