Spaces:

MiakOnline
/

RecToTextPro

Sleeping

App Files Files Community

MiakOnline commited on Mar 14

Commit

55e1272

verified ·

1 Parent(s): 25248e0

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -137

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import tempfile
 import os
 import time
 import re
 from pydub import AudioSegment
 from openpyxl import Workbook
 from openpyxl.styles import Font
@@ -21,14 +22,15 @@ st.set_page_config(
     page_icon="🎤"
 )
 # ---------------------------------------------------
-# SIDEBAR SETTINGS
 # ---------------------------------------------------
-st.sidebar.title("⚙️ Settings")
 model_option = st.sidebar.selectbox(
     "Select Whisper Model",
-    ["base", "small"]
 )
 output_mode = st.sidebar.radio(
@@ -36,185 +38,120 @@ output_mode = st.sidebar.radio(
     ["Roman Urdu", "English"]
 )
-if st.sidebar.button("🧹 Clear Session"):
-    st.session_state.clear()
-    st.rerun()
 # ---------------------------------------------------
-# HEADER
 # ---------------------------------------------------
-st.markdown("<h1 style='text-align:center;'>🎤 RecToText Pro</h1>", unsafe_allow_html=True)
-st.markdown("<p style='text-align:center;'>AI Lecture Transcriber with Excel & Word Export</p>", unsafe_allow_html=True)
-st.divider()
 # ---------------------------------------------------
-# FUNCTIONS
 # ---------------------------------------------------
-@st.cache_resource
-def load_model(model_size):
-    return whisper.load_model(model_size)
 def clean_text(text):
-    filler_words = ["um", "hmm", "acha", "matlab", "uh", "huh"]
     pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
     text = re.sub(pattern, '', text, flags=re.IGNORECASE)
     text = re.sub(r'\s+', ' ', text).strip()
-    # Better paragraph formatting
-    sentences = re.split(r'(?<=[.!?]) +', text)
-    paragraphs = []
-    temp_para = ""
-    for i, sentence in enumerate(sentences):
-        temp_para += sentence + " "
-        if (i + 1) % 5 == 0:
-            paragraphs.append(temp_para.strip())
-            temp_para = ""
-    if temp_para:
-        paragraphs.append(temp_para.strip())
-    return "\n\n".join(paragraphs)
 def convert_to_roman_urdu(text):
     replacements = {
         "ہے": "hai",
         "میں": "main",
         "اور": "aur",
-        "کیا": "kya",
-        "آپ": "aap",
-        "کی": "ki",
-        "کا": "ka"
     }
     for urdu, roman in replacements.items():
         text = text.replace(urdu, roman)
     return text
-def create_excel(segments):
     wb = Workbook()
     ws = wb.active
-    ws.title = "Transcription"
-    headers = ["Timestamp", "Transcribed Text", "Cleaned Output"]
-    ws.append(headers)
-    for col in range(1, 4):
-        ws.cell(row=1, column=col).font = Font(bold=True)
-    for seg in segments:
-        timestamp = f"{round(seg['start'],2)} - {round(seg['end'],2)}"
-        raw_text = seg["text"]
-        cleaned = clean_text(raw_text)
-        ws.append([timestamp, raw_text, cleaned])
-    excel_buffer = BytesIO()
-    wb.save(excel_buffer)
-    excel_buffer.seek(0)
-    return excel_buffer
-def create_word_document(cleaned_text):
     doc = Document()
-    title = doc.add_heading("Lecture Transcription", level=1)
-    title.alignment = WD_ALIGN_PARAGRAPH.CENTER
-    doc.add_paragraph("")
-    paragraphs = cleaned_text.split("\n\n")
-    for para in paragraphs:
-        p = doc.add_paragraph(para)
-        p.paragraph_format.space_after = Pt(12)
-    word_buffer = BytesIO()
-    doc.save(word_buffer)
-    word_buffer.seek(0)
-    return word_buffer
 # ---------------------------------------------------
 # FILE UPLOADER
 # ---------------------------------------------------
 uploaded_file = st.file_uploader(
-    "Upload Lecture Recording (.mp3, .wav, .m4a, .aac)",
     type=["mp3", "wav", "m4a", "aac"]
 )
 if uploaded_file:
-    st.audio(uploaded_file)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-        file_extension = uploaded_file.name.split(".")[-1]
-        audio = AudioSegment.from_file(uploaded_file, format=file_extension)
-        audio.export(tmp.name, format="wav")
-        temp_audio_path = tmp.name
-    st.info("Loading Whisper model...")
-    model = load_model(model_option)
-    start_time = time.time()
-    with st.spinner("Transcribing... Please wait."):
-        result = model.transcribe(temp_audio_path)
-    end_time = time.time()
-    os.remove(temp_audio_path)
-    detected_lang = result.get("language", "Unknown")
-    segments = result["segments"]
-    full_text = result["text"]
-    cleaned_text = clean_text(full_text)
-    if output_mode == "Roman Urdu":
-        cleaned_text = convert_to_roman_urdu(cleaned_text)
-    word_count = len(cleaned_text.split())
-    processing_time = round(end_time - start_time, 2)
-    col1, col2 = st.columns(2)
-    with col1:
-        st.subheader("📜 Raw Transcription")
-        st.text_area("", full_text, height=350)
-    with col2:
-        st.subheader("✨ Clean Story Format")
-        st.text_area("", cleaned_text, height=350)
-    st.divider()
-    st.write(f"**Detected Language:** {detected_lang}")
-    st.write(f"**Word Count:** {word_count}")
-    st.write(f"**Processing Time:** {processing_time} seconds")
-    # ----------------------------
-    # EXPORT FILES
-    # ----------------------------
-    excel_file = create_excel(segments)
-    word_file = create_word_document(cleaned_text)
-    colA, colB = st.columns(2)
-    with colA:
-        st.download_button(
-            label="📥 Download Excel (.xlsx)",
-            data=excel_file,
-            file_name="RecToText_Transcription.xlsx",
-            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
-        )
-    with colB:
-        st.download_button(
-            label="📄 Download Word (.docx)",
-            data=word_file,
-            file_name="RecToText_Lecture.docx",
-            mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
-        )
-st.divider()
-st.markdown(
-    "<p style='text-align:center; font-size:12px;'>RecToText Pro | Whisper + Streamlit Powered</p>",
-    unsafe_allow_html=True
-)

 import os
 import time
 import re
+import torch
 from pydub import AudioSegment
 from openpyxl import Workbook
 from openpyxl.styles import Font
     page_icon="🎤"
 )
+st.title("🎤 RecToText Pro")
+st.caption("Stable Production Version | CPU Optimized")
 # ---------------------------------------------------
+# SIDEBAR
 # ---------------------------------------------------
 model_option = st.sidebar.selectbox(
     "Select Whisper Model",
+    ["base"]  # Force base for stability
 )
 output_mode = st.sidebar.radio(
     ["Roman Urdu", "English"]
 )
 # ---------------------------------------------------
+# LOAD MODEL (FORCE CPU)
 # ---------------------------------------------------
+@st.cache_resource
+def load_model():
+    return whisper.load_model("base", device="cpu")
 # ---------------------------------------------------
+# CLEAN TEXT
 # ---------------------------------------------------
 def clean_text(text):
+    filler_words = ["um", "hmm", "acha", "matlab", "uh"]
     pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
     text = re.sub(pattern, '', text, flags=re.IGNORECASE)
     text = re.sub(r'\s+', ' ', text).strip()
+    return text
+# ---------------------------------------------------
+# ROMAN URDU
+# ---------------------------------------------------
 def convert_to_roman_urdu(text):
     replacements = {
         "ہے": "hai",
         "میں": "main",
         "اور": "aur",
+        "کیا": "kya"
     }
     for urdu, roman in replacements.items():
         text = text.replace(urdu, roman)
     return text
+# ---------------------------------------------------
+# EXCEL EXPORT
+# ---------------------------------------------------
+def create_excel(text):
     wb = Workbook()
     ws = wb.active
+    ws.append(["Transcription"])
+    ws["A1"].font = Font(bold=True)
+    ws.append([text])
+    buffer = BytesIO()
+    wb.save(buffer)
+    buffer.seek(0)
+    return buffer
+# ---------------------------------------------------
+# WORD EXPORT
+# ---------------------------------------------------
+def create_word(text):
     doc = Document()
+    doc.add_heading("Lecture Transcription", level=1)
+    doc.add_paragraph(text)
+    buffer = BytesIO()
+    doc.save(buffer)
+    buffer.seek(0)
+    return buffer
 # ---------------------------------------------------
 # FILE UPLOADER
 # ---------------------------------------------------
 uploaded_file = st.file_uploader(
+    "Upload Lecture (.mp3, .wav, .m4a, .aac)",
     type=["mp3", "wav", "m4a", "aac"]
 )
 if uploaded_file:
+    try:
+        st.audio(uploaded_file)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+            ext = uploaded_file.name.split(".")[-1]
+            audio = AudioSegment.from_file(uploaded_file, format=ext)
+            audio.export(tmp.name, format="wav")
+            temp_path = tmp.name
+        model = load_model()
+        with st.spinner("Transcribing safely on CPU..."):
+            result = model.transcribe(temp_path)
+        os.remove(temp_path)
+        text = result["text"]
+        cleaned = clean_text(text)
+        if output_mode == "Roman Urdu":
+            cleaned = convert_to_roman_urdu(cleaned)
+        st.success("Transcription Completed ✅")
+        st.text_area("Output", cleaned, height=300)
+        excel_file = create_excel(cleaned)
+        word_file = create_word(cleaned)
+        col1, col2 = st.columns(2)
+        with col1:
+            st.download_button(
+                "Download Excel",
+                excel_file,
+                "RecToText.xlsx"
+            )
+        with col2:
+            st.download_button(
+                "Download Word",
+                word_file,
+                "RecToText.docx"
+            )
+    except Exception as e:
+        st.error("Processing Error Occurred.")
+        st.exception(e)