Spaces:

MiakOnline
/

RecToTextPro

Sleeping

App Files Files Community

MiakOnline commited on Mar 14

Commit

9a53ebf

verified ·

1 Parent(s): a53d6d2

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -33

app.py CHANGED Viewed

@@ -4,42 +4,37 @@ import os
 import re
 import time
 from pydub import AudioSegment
-from transformers import pipeline
 from openpyxl import Workbook
 from docx import Document
 from io import BytesIO
 st.set_page_config(page_title="RecToText Pro", layout="wide")
-st.title("🎤 RecToText Pro - Stable Long Audio Edition")
-st.caption("Long Lecture Supported | Word + Excel Export")
-# --------------------------------------------------
-# LOAD MODEL (CPU SAFE)
-# --------------------------------------------------
 @st.cache_resource
-def load_asr():
-    return pipeline(
-        "automatic-speech-recognition",
-        model="openai/whisper-base",
-        device=-1,
-        return_timestamps=True  # FIX FOR LONG AUDIO
-    )
-asr = load_asr()
-# --------------------------------------------------
 # CLEAN TEXT
-# --------------------------------------------------
 def clean_text(text):
-    filler = ["um", "hmm", "acha", "matlab"]
     pattern = r'\b(?:' + '|'.join(filler) + r')\b'
     text = re.sub(pattern, "", text, flags=re.IGNORECASE)
     return re.sub(r'\s+', ' ', text).strip()
-# --------------------------------------------------
 # ROMAN URDU
-# --------------------------------------------------
 def convert_to_roman(text):
     replacements = {
         "ہے": "hai",
@@ -51,9 +46,9 @@ def convert_to_roman(text):
         text = text.replace(k, v)
     return text
-# --------------------------------------------------
 # EXPORT EXCEL
-# --------------------------------------------------
 def export_excel(text):
     wb = Workbook()
     ws = wb.active
@@ -64,9 +59,9 @@ def export_excel(text):
     buffer.seek(0)
     return buffer
-# --------------------------------------------------
 # EXPORT WORD
-# --------------------------------------------------
 def export_word(text):
     doc = Document()
     doc.add_heading("Lecture Transcription", level=1)
@@ -76,9 +71,9 @@ def export_word(text):
     buffer.seek(0)
     return buffer
-# --------------------------------------------------
 # FILE UPLOADER
-# --------------------------------------------------
 uploaded = st.file_uploader(
     "Upload Audio (.mp3, .wav, .m4a, .aac)",
     type=["mp3", "wav", "m4a", "aac"]
@@ -90,6 +85,7 @@ if uploaded:
     try:
         st.audio(uploaded)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
             ext = uploaded.name.split(".")[-1]
             audio = AudioSegment.from_file(uploaded, format=ext)
@@ -98,16 +94,14 @@ if uploaded:
         start = time.time()
-        with st.spinner("Transcribing long audio safely..."):
-            result = asr(temp_path)
-        os.remove(temp_path)
-        # FIX: Extract text from chunks safely
-        if isinstance(result, dict) and "chunks" in result:
-            text = " ".join([chunk["text"] for chunk in result["chunks"]])
-        else:
-            text = result["text"]
         text = clean_text(text)

 import re
 import time
 from pydub import AudioSegment
+from faster_whisper import WhisperModel
 from openpyxl import Workbook
 from docx import Document
 from io import BytesIO
 st.set_page_config(page_title="RecToText Pro", layout="wide")
+st.title("🎤 RecToText Pro - Stable Production Version")
+st.caption("Long Audio Safe | No Transformer Errors")
+# -------------------------------
+# LOAD MODEL (INT8 CPU SAFE)
+# -------------------------------
 @st.cache_resource
+def load_model():
+    return WhisperModel("base", device="cpu", compute_type="int8")
+model = load_model()
+# -------------------------------
 # CLEAN TEXT
+# -------------------------------
 def clean_text(text):
+    filler = ["um", "hmm", "acha", "matlab", "uh"]
     pattern = r'\b(?:' + '|'.join(filler) + r')\b'
     text = re.sub(pattern, "", text, flags=re.IGNORECASE)
     return re.sub(r'\s+', ' ', text).strip()
+# -------------------------------
 # ROMAN URDU
+# -------------------------------
 def convert_to_roman(text):
     replacements = {
         "ہے": "hai",
         text = text.replace(k, v)
     return text
+# -------------------------------
 # EXPORT EXCEL
+# -------------------------------
 def export_excel(text):
     wb = Workbook()
     ws = wb.active
     buffer.seek(0)
     return buffer
+# -------------------------------
 # EXPORT WORD
+# -------------------------------
 def export_word(text):
     doc = Document()
     doc.add_heading("Lecture Transcription", level=1)
     buffer.seek(0)
     return buffer
+# -------------------------------
 # FILE UPLOADER
+# -------------------------------
 uploaded = st.file_uploader(
     "Upload Audio (.mp3, .wav, .m4a, .aac)",
     type=["mp3", "wav", "m4a", "aac"]
     try:
         st.audio(uploaded)
+        # Convert to WAV
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
             ext = uploaded.name.split(".")[-1]
             audio = AudioSegment.from_file(uploaded, format=ext)
         start = time.time()
+        with st.spinner("Transcribing safely..."):
+            segments, info = model.transcribe(temp_path)
+            text = ""
+            for segment in segments:
+                text += segment.text + " "
+        os.remove(temp_path)
         text = clean_text(text)