Spaces:

MiakOnline
/

RecToTextPro

Sleeping

App Files Files Community

MiakOnline commited on 12 days ago

Commit

88bdbd2

verified ·

1 Parent(s): ea817e9

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -72

app.py CHANGED Viewed

@@ -13,22 +13,28 @@ from io import BytesIO
 st.set_page_config(page_title="RecToText Pro", layout="wide")
-st.title("🎤 RecToText Pro – Intelligent Lecture Transcriber")
-st.caption("Strict English / Roman Urdu Output | No Script Mixing")
-# -------------------------------------------------------
-# LOAD WHISPER MODEL (CPU INT8 OPTIMIZED)
-# -------------------------------------------------------
 @st.cache_resource
 def load_model():
     return WhisperModel("base", device="cpu", compute_type="int8")
 model = load_model()
-# -------------------------------------------------------
-# STRICT ROMAN URDU TRANSLITERATION
-# -------------------------------------------------------
-def transliterate_to_roman(text):
     replacements = {
         "ہے": "hai",
         "میں": "main",
@@ -41,30 +47,28 @@ def transliterate_to_roman(text):
         "پر": "par",
         "نہیں": "nahin"
     }
-    for urdu, roman in replacements.items():
-        text = text.replace(urdu, roman)
-    # remove any remaining non-ASCII characters
-    text = re.sub(r'[^\x00-\x7F]+', '', text)
-    return text
-# -------------------------------------------------------
-# CLEAN + STRUCTURE STORY
-# -------------------------------------------------------
-def clean_and_structure(text):
-    filler = ["um", "hmm", "acha", "matlab", "uh"]
-    pattern = r'\b(?:' + '|'.join(filler) + r')\b'
-    text = re.sub(pattern, "", text, flags=re.IGNORECASE)
-    text = re.sub(r'\s+', ' ', text).strip()
     sentences = re.split(r'(?<=[.!?]) +', text)
     paragraphs = []
     temp = ""
     for i, sentence in enumerate(sentences):
         temp += sentence + " "
-        if (i + 1) % 5 == 0:
             paragraphs.append(temp.strip())
             temp = ""
@@ -73,27 +77,23 @@ def clean_and_structure(text):
     return "\n\n".join(paragraphs)
-# -------------------------------------------------------
-# AUDIO CHUNKING (30 SEC SAFE)
-# -------------------------------------------------------
-def chunk_audio(audio_path):
-    audio = AudioSegment.from_wav(audio_path)
-    chunk_length = 30 * 1000
     chunks = []
-    for i in range(0, len(audio), chunk_length):
-        chunks.append(audio[i:i + chunk_length])
     return chunks
-# -------------------------------------------------------
 # EXPORT EXCEL
-# -------------------------------------------------------
 def export_excel(text):
     wb = Workbook()
     ws = wb.active
-    ws.title = "Transcription"
     ws.append(["Lecture Transcription"])
     ws["A1"].font = Font(bold=True)
     ws.append([text])
@@ -103,12 +103,12 @@ def export_excel(text):
     buffer.seek(0)
     return buffer
-# -------------------------------------------------------
 # EXPORT WORD
-# -------------------------------------------------------
-def export_word(title, text):
     doc = Document()
-    doc.add_heading(title, level=1)
     paragraphs = text.split("\n\n")
     for para in paragraphs:
@@ -120,11 +120,18 @@ def export_word(title, text):
     buffer.seek(0)
     return buffer
-# -------------------------------------------------------
 # FILE UPLOADER
-# -------------------------------------------------------
 uploaded = st.file_uploader(
-    "Upload Lecture Recording (MP3, WAV, M4A, AAC) – Max 200MB",
     type=["mp3", "wav", "m4a", "aac"]
 )
@@ -134,7 +141,6 @@ if uploaded:
     try:
         st.audio(uploaded)
-        # Convert to WAV
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
             ext = uploaded.name.split(".")[-1]
             audio = AudioSegment.from_file(uploaded, format=ext)
@@ -142,7 +148,6 @@ if uploaded:
             temp_path = tmp.name
         start_time = time.time()
         chunks = chunk_audio(temp_path)
         full_text = ""
@@ -158,47 +163,43 @@ if uploaded:
         os.remove(temp_path)
         if output_mode == "Roman Urdu":
-            full_text = transliterate_to_roman(full_text)
         else:
             full_text = re.sub(r'[^\x00-\x7F]+', '', full_text)
-        structured_text = clean_and_structure(full_text)
-        word_count = len(structured_text.split())
-        processing_time = round(time.time() - start_time, 2)
-        col1, col2 = st.columns(2)
-        with col1:
-            st.subheader("Raw Transcription")
-            st.text_area("", full_text, height=300)
-        with col2:
-            st.subheader("Clean Story Format")
-            st.text_area("", structured_text, height=300)
-        st.divider()
         st.write(f"Word Count: {word_count}")
         st.write(f"Processing Time: {processing_time} sec")
-        excel_file = export_excel(structured_text)
-        word_file = export_word("Lecture Transcription", structured_text)
-        colA, colB = st.columns(2)
-        with colA:
-            st.download_button("Download Excel (.xlsx)", excel_file, "RecToText.xlsx")
-        with colB:
-            st.download_button("Download Word (.docx)", word_file, "RecToText.docx")
-        st.success("Complete Clean Story Generated Successfully.")
     except Exception as e:
         st.error("Processing Error")
         st.exception(e)
 st.markdown("---")
-st.markdown("<center>Developed with ❤️ using Whisper & Streamlit</center>", unsafe_allow_html=True)

 st.set_page_config(page_title="RecToText Pro", layout="wide")
+st.title("🎤 RecToText Pro – AI Polished Edition")
+st.caption("Professional Lecture Transcriber | Clean Story | Grammar Polished")
+# --------------------------------------------------
+# SESSION STATE
+# --------------------------------------------------
+if "processed_text" not in st.session_state:
+    st.session_state.processed_text = None
+# --------------------------------------------------
+# LOAD MODEL
+# --------------------------------------------------
 @st.cache_resource
 def load_model():
     return WhisperModel("base", device="cpu", compute_type="int8")
 model = load_model()
+# --------------------------------------------------
+# STRICT ROMAN URDU
+# --------------------------------------------------
+def transliterate(text):
     replacements = {
         "ہے": "hai",
         "میں": "main",
         "پر": "par",
         "نہیں": "nahin"
     }
+    for k, v in replacements.items():
+        text = text.replace(k, v)
+    return re.sub(r'[^\x00-\x7F]+', '', text)
+# --------------------------------------------------
+# AI STYLE POLISHING (RULE BASED SAFE)
+# --------------------------------------------------
+def polish_text(text):
+    text = re.sub(r'\s+', ' ', text).strip()
     sentences = re.split(r'(?<=[.!?]) +', text)
     paragraphs = []
     temp = ""
     for i, sentence in enumerate(sentences):
+        sentence = sentence.strip().capitalize()
+        if not sentence.endswith((".", "!", "?")):
+            sentence += "."
         temp += sentence + " "
+        if (i + 1) % 4 == 0:
             paragraphs.append(temp.strip())
             temp = ""
     return "\n\n".join(paragraphs)
+# --------------------------------------------------
+# AUDIO CHUNKING
+# --------------------------------------------------
+def chunk_audio(path):
+    audio = AudioSegment.from_wav(path)
     chunks = []
+    chunk_len = 30 * 1000
+    for i in range(0, len(audio), chunk_len):
+        chunks.append(audio[i:i + chunk_len])
     return chunks
+# --------------------------------------------------
 # EXPORT EXCEL
+# --------------------------------------------------
 def export_excel(text):
     wb = Workbook()
     ws = wb.active
     ws.append(["Lecture Transcription"])
     ws["A1"].font = Font(bold=True)
     ws.append([text])
     buffer.seek(0)
     return buffer
+# --------------------------------------------------
 # EXPORT WORD
+# --------------------------------------------------
+def export_word(text):
     doc = Document()
+    doc.add_heading("Lecture Transcription", level=1)
     paragraphs = text.split("\n\n")
     for para in paragraphs:
     buffer.seek(0)
     return buffer
+# --------------------------------------------------
+# CLEAR BUTTON
+# --------------------------------------------------
+if st.sidebar.button("🧹 Clear All"):
+    st.session_state.processed_text = None
+    st.rerun()
+# --------------------------------------------------
 # FILE UPLOADER
+# --------------------------------------------------
 uploaded = st.file_uploader(
+    "Upload Lecture (MP3, WAV, M4A, AAC) – Max 200MB",
     type=["mp3", "wav", "m4a", "aac"]
 )
     try:
         st.audio(uploaded)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
             ext = uploaded.name.split(".")[-1]
             audio = AudioSegment.from_file(uploaded, format=ext)
             temp_path = tmp.name
         start_time = time.time()
         chunks = chunk_audio(temp_path)
         full_text = ""
         os.remove(temp_path)
+        # Strict output control
         if output_mode == "Roman Urdu":
+            full_text = transliterate(full_text)
         else:
             full_text = re.sub(r'[^\x00-\x7F]+', '', full_text)
+        polished = polish_text(full_text)
+        st.session_state.processed_text = polished
+        word_count = len(polished.split())
+        processing_time = round(time.time() - start_time, 2)
+        st.subheader("✨ Clean AI Polished Story")
+        st.text_area("", polished, height=350)
         st.write(f"Word Count: {word_count}")
         st.write(f"Processing Time: {processing_time} sec")
+        excel_file = export_excel(polished)
+        word_file = export_word(polished)
+        col1, col2 = st.columns(2)
+        with col1:
+            if st.download_button("Download Excel (.xlsx)", excel_file):
+                st.session_state.processed_text = None
+        with col2:
+            if st.download_button("Download Word (.docx)", word_file):
+                st.session_state.processed_text = None
+        st.success("Story Generated Successfully.")
     except Exception as e:
         st.error("Processing Error")
         st.exception(e)
 st.markdown("---")
+st.markdown("<center>RecToText Pro – AI Polished Edition</center>", unsafe_allow_html=True)