Spaces:

MiakOnline
/

RecToTextPro

Sleeping

App Files Files Community

MiakOnline commited on Mar 14

Commit

eebed8e

verified ·

1 Parent(s): 9a53ebf

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -62

app.py CHANGED Viewed

@@ -6,124 +6,219 @@ import time
 from pydub import AudioSegment
 from faster_whisper import WhisperModel
 from openpyxl import Workbook
 from docx import Document
 from io import BytesIO
 st.set_page_config(page_title="RecToText Pro", layout="wide")
-st.title("🎤 RecToText Pro - Stable Production Version")
-st.caption("Long Audio Safe | No Transformer Errors")
-# -------------------------------
-# LOAD MODEL (INT8 CPU SAFE)
-# -------------------------------
 @st.cache_resource
-def load_model():
-    return WhisperModel("base", device="cpu", compute_type="int8")
-model = load_model()
-# -------------------------------
-# CLEAN TEXT
-# -------------------------------
 def clean_text(text):
-    filler = ["um", "hmm", "acha", "matlab", "uh"]
-    pattern = r'\b(?:' + '|'.join(filler) + r')\b'
     text = re.sub(pattern, "", text, flags=re.IGNORECASE)
-    return re.sub(r'\s+', ' ', text).strip()
-# -------------------------------
-# ROMAN URDU
-# -------------------------------
-def convert_to_roman(text):
     replacements = {
         "ہے": "hai",
         "میں": "main",
         "اور": "aur",
-        "کیا": "kya"
     }
-    for k, v in replacements.items():
-        text = text.replace(k, v)
     return text
-# -------------------------------
 # EXPORT EXCEL
-# -------------------------------
-def export_excel(text):
     wb = Workbook()
     ws = wb.active
-    ws.append(["Lecture Transcription"])
-    ws.append([text])
     buffer = BytesIO()
     wb.save(buffer)
     buffer.seek(0)
     return buffer
-# -------------------------------
 # EXPORT WORD
-# -------------------------------
-def export_word(text):
     doc = Document()
-    doc.add_heading("Lecture Transcription", level=1)
-    doc.add_paragraph(text)
     buffer = BytesIO()
     doc.save(buffer)
     buffer.seek(0)
     return buffer
-# -------------------------------
-# FILE UPLOADER
-# -------------------------------
-uploaded = st.file_uploader(
-    "Upload Audio (.mp3, .wav, .m4a, .aac)",
     type=["mp3", "wav", "m4a", "aac"]
 )
-output_mode = st.radio("Output Format", ["English", "Roman Urdu"])
-if uploaded:
     try:
-        st.audio(uploaded)
-        # Convert to WAV
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-            ext = uploaded.name.split(".")[-1]
-            audio = AudioSegment.from_file(uploaded, format=ext)
             audio.export(tmp.name, format="wav")
-            temp_path = tmp.name
-        start = time.time()
-        with st.spinner("Transcribing safely..."):
-            segments, info = model.transcribe(temp_path)
-            text = ""
-            for segment in segments:
-                text += segment.text + " "
-        os.remove(temp_path)
-        text = clean_text(text)
-        if output_mode == "Roman Urdu":
-            text = convert_to_roman(text)
-        st.success("Transcription Completed ✅")
-        st.text_area("Output", text, height=300)
-        excel_file = export_excel(text)
-        word_file = export_word(text)
         col1, col2 = st.columns(2)
         with col1:
-            st.download_button("Download Excel", excel_file, "RecToText.xlsx")
         with col2:
-            st.download_button("Download Word", word_file, "RecToText.docx")
-        st.write(f"Processing Time: {round(time.time()-start,2)} sec")
     except Exception as e:
-        st.error("Error Occurred")
-        st.exception(e)

 from pydub import AudioSegment
 from faster_whisper import WhisperModel
 from openpyxl import Workbook
+from openpyxl.styles import Font
 from docx import Document
+from docx.shared import Pt
 from io import BytesIO
+# -----------------------------------------------------
+# PAGE CONFIG
+# -----------------------------------------------------
 st.set_page_config(page_title="RecToText Pro", layout="wide")
+# Increase upload limit to 200MB
+st.markdown("""
+    <style>
+    .block-container {padding-top: 2rem;}
+    </style>
+""", unsafe_allow_html=True)
+# -----------------------------------------------------
+# HEADER
+# -----------------------------------------------------
+st.title("🎤 RecToText Pro – Intelligent Lecture Transcriber")
+st.caption("Upload Lecture | AI Transcription | Excel & Word Export")
+# -----------------------------------------------------
+# SIDEBAR CONTROLS
+# -----------------------------------------------------
+st.sidebar.header("⚙️ Settings")
+model_size = st.sidebar.selectbox(
+    "Whisper Model",
+    ["base", "small"]
+)
+output_format = st.sidebar.radio(
+    "Output Format",
+    ["English", "Roman Urdu"]
+)
+if st.sidebar.button("🧹 Clear Session"):
+    st.session_state.clear()
+    st.rerun()
+# -----------------------------------------------------
+# LOAD WHISPER MODEL (CPU INT8 OPTIMIZED)
+# -----------------------------------------------------
 @st.cache_resource
+def load_model(size):
+    return WhisperModel(size, device="cpu", compute_type="int8")
+model = load_model(model_size)
+# -----------------------------------------------------
+# TEXT PROCESSING FUNCTIONS
+# -----------------------------------------------------
 def clean_text(text):
+    filler_words = ["um", "hmm", "acha", "matlab", "uh"]
+    pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
     text = re.sub(pattern, "", text, flags=re.IGNORECASE)
+    text = re.sub(r'\s+', ' ', text).strip()
+    sentences = re.split(r'(?<=[.!?]) +', text)
+    paragraphs = []
+    temp = ""
+    for i, sentence in enumerate(sentences):
+        temp += sentence + " "
+        if (i + 1) % 5 == 0:
+            paragraphs.append(temp.strip())
+            temp = ""
+    if temp:
+        paragraphs.append(temp.strip())
+    return "\n\n".join(paragraphs)
+def convert_to_roman_urdu(text):
     replacements = {
         "ہے": "hai",
         "میں": "main",
         "اور": "aur",
+        "کیا": "kya",
+        "کی": "ki",
+        "کا": "ka"
     }
+    for urdu, roman in replacements.items():
+        text = text.replace(urdu, roman)
     return text
+# -----------------------------------------------------
 # EXPORT EXCEL
+# -----------------------------------------------------
+def export_excel(segments):
     wb = Workbook()
     ws = wb.active
+    ws.title = "Transcription"
+    headers = ["Timestamp", "Original Text", "Cleaned Text"]
+    ws.append(headers)
+    for col in range(1, 4):
+        ws.cell(row=1, column=col).font = Font(bold=True)
+    for segment in segments:
+        timestamp = f"{round(segment.start,2)} - {round(segment.end,2)}"
+        original = segment.text.strip()
+        cleaned = clean_text(original)
+        ws.append([timestamp, original, cleaned])
     buffer = BytesIO()
     wb.save(buffer)
     buffer.seek(0)
     return buffer
+# -----------------------------------------------------
 # EXPORT WORD
+# -----------------------------------------------------
+def export_word(title, cleaned_text):
     doc = Document()
+    doc.add_heading(title, level=1)
+    doc.add_paragraph("")
+    paragraphs = cleaned_text.split("\n\n")
+    for para in paragraphs:
+        p = doc.add_paragraph(para)
+        p.paragraph_format.space_after = Pt(12)
     buffer = BytesIO()
     doc.save(buffer)
     buffer.seek(0)
     return buffer
+# -----------------------------------------------------
+# FILE UPLOADER (200MB SUPPORT)
+# -----------------------------------------------------
+uploaded_file = st.file_uploader(
+    "Upload Lecture Recording (Max 200MB) – MP3, WAV, M4A, AAC",
     type=["mp3", "wav", "m4a", "aac"]
 )
+if uploaded_file:
     try:
+        st.audio(uploaded_file)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+            ext = uploaded_file.name.split(".")[-1]
+            audio = AudioSegment.from_file(uploaded_file, format=ext)
             audio.export(tmp.name, format="wav")
+            temp_audio_path = tmp.name
+        start_time = time.time()
+        with st.spinner("🔄 Transcribing... Please wait"):
+            segments, info = model.transcribe(temp_audio_path)
+        os.remove(temp_audio_path)
+        full_text = ""
+        segment_list = []
+        for segment in segments:
+            full_text += segment.text + " "
+            segment_list.append(segment)
+        cleaned_text = clean_text(full_text)
+        if output_format == "Roman Urdu":
+            cleaned_text = convert_to_roman_urdu(cleaned_text)
+        word_count = len(cleaned_text.split())
+        processing_time = round(time.time() - start_time, 2)
+        detected_language = info.language
         col1, col2 = st.columns(2)
         with col1:
+            st.subheader("📜 Raw Transcription")
+            st.text_area("", full_text, height=300)
         with col2:
+            st.subheader("✨ Clean Story Format")
+            st.text_area("", cleaned_text, height=300)
+        st.divider()
+        st.write(f"**Detected Language:** {detected_language}")
+        st.write(f"**Word Count:** {word_count}")
+        st.write(f"**Processing Time:** {processing_time} sec")
+        excel_file = export_excel(segment_list)
+        word_file = export_word("Lecture Transcription", cleaned_text)
+        colA, colB = st.columns(2)
+        with colA:
+            st.download_button(
+                "📥 Download Excel (.xlsx)",
+                data=excel_file,
+                file_name="RecToText_Transcription.xlsx"
+            )
+        with colB:
+            st.download_button(
+                "📄 Download Word (.docx)",
+                data=word_file,
+                file_name="RecToText_Lecture.docx"
+            )
+        st.success("✅ Transcription Completed Successfully!")
     except Exception as e:
+        st.error("❌ Error Occurred During Processing")
+        st.exception(e)
+st.markdown("---")
+st.markdown("<center>Developed with ❤️ using Whisper & Streamlit</center>", unsafe_allow_html=True)