Spaces:

MiakOnline
/

RecToTextPro2

Sleeping

App Files Files Community

MiakOnline commited on 12 days ago

Commit

644ed40

verified ·

1 Parent(s): c2dad5a

Create app.py

Browse files

Files changed (1) hide show

app.py +250 -0

app.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import streamlit as st
+import whisper
+import tempfile
+import os
+import time
+import re
+from pydub import AudioSegment
+from openpyxl import Workbook
+from openpyxl.styles import Font
+from docx import Document
+from docx.shared import Pt
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+from io import BytesIO
+from collections import Counter
+# ---------------------------------------------------
+# PAGE CONFIG
+# ---------------------------------------------------
+st.set_page_config(
+    page_title="RecToText Pro - AI Edition",
+    layout="wide",
+    page_icon="🎤"
+)
+# ---------------------------------------------------
+# SIDEBAR
+# ---------------------------------------------------
+st.sidebar.title("⚙️ Settings")
+model_option = st.sidebar.selectbox(
+    "Select Whisper Model",
+    ["base", "small"]
+)
+output_mode = st.sidebar.radio(
+    "Output Format",
+    ["Roman Urdu", "English"]
+)
+if st.sidebar.button("🧹 Clear Session"):
+    st.session_state.clear()
+    st.rerun()
+# ---------------------------------------------------
+# HEADER
+# ---------------------------------------------------
+st.markdown("<h1 style='text-align:center;'>🎤 RecToText Pro - AI Enhanced</h1>", unsafe_allow_html=True)
+st.markdown("<p style='text-align:center;'>Auto Title | AI Summary | Smart Formatting</p>", unsafe_allow_html=True)
+st.divider()
+# ---------------------------------------------------
+# FUNCTIONS
+# ---------------------------------------------------
+@st.cache_resource
+def load_model(model_size):
+    return whisper.load_model(model_size)
+def clean_text(text):
+    filler_words = ["um", "hmm", "acha", "matlab", "uh", "huh"]
+    pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
+    text = re.sub(pattern, '', text, flags=re.IGNORECASE)
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+def convert_to_roman_urdu(text):
+    replacements = {
+        "ہے": "hai",
+        "میں": "main",
+        "اور": "aur",
+        "کیا": "kya",
+        "آپ": "aap",
+        "کی": "ki",
+        "کا": "ka"
+    }
+    for urdu, roman in replacements.items():
+        text = text.replace(urdu, roman)
+    return text
+# -----------------------------
+# AI Title Detection
+# -----------------------------
+def generate_title(text):
+    words = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
+    common_words = Counter(words).most_common(5)
+    keywords = [word.capitalize() for word, _ in common_words[:3]]
+    if keywords:
+        return "Lecture on " + " ".join(keywords)
+    return "Lecture Transcription"
+# -----------------------------
+# AI Summary Generator
+# -----------------------------
+def generate_summary(text):
+    sentences = re.split(r'(?<=[.!?]) +', text)
+    summary = " ".join(sentences[:5])
+    return summary
+# -----------------------------
+# Smart Formatting
+# -----------------------------
+def smart_format(text):
+    sentences = re.split(r'(?<=[.!?]) +', text)
+    formatted = ""
+    for i, sentence in enumerate(sentences):
+        if len(sentence.split()) < 8:
+            formatted += f"\n\n{sentence.upper()}\n"
+        else:
+            formatted += sentence + " "
+    return formatted.strip()
+# -----------------------------
+# Excel Export
+# -----------------------------
+def create_excel(segments):
+    wb = Workbook()
+    ws = wb.active
+    ws.title = "Transcription"
+    headers = ["Timestamp", "Transcribed Text", "Cleaned Output"]
+    ws.append(headers)
+    for col in range(1, 4):
+        ws.cell(row=1, column=col).font = Font(bold=True)
+    for seg in segments:
+        timestamp = f"{round(seg['start'],2)} - {round(seg['end'],2)}"
+        raw_text = seg["text"]
+        cleaned = clean_text(raw_text)
+        ws.append([timestamp, raw_text, cleaned])
+    buffer = BytesIO()
+    wb.save(buffer)
+    buffer.seek(0)
+    return buffer
+# -----------------------------
+# Word Export
+# -----------------------------
+def create_word_document(title, summary, formatted_text):
+    doc = Document()
+    # Title
+    doc.add_heading(title, level=1).alignment = WD_ALIGN_PARAGRAPH.CENTER
+    doc.add_page_break()
+    # Summary Page
+    doc.add_heading("Executive Summary", level=2)
+    doc.add_paragraph(summary)
+    doc.add_page_break()
+    # Main Content
+    doc.add_heading("Full Lecture Content", level=2)
+    paragraphs = formatted_text.split("\n\n")
+    for para in paragraphs:
+        doc.add_paragraph(para).paragraph_format.space_after = Pt(12)
+    buffer = BytesIO()
+    doc.save(buffer)
+    buffer.seek(0)
+    return buffer
+# ---------------------------------------------------
+# FILE UPLOADER
+# ---------------------------------------------------
+uploaded_file = st.file_uploader(
+    "Upload Lecture Recording (.mp3, .wav, .m4a, .aac)",
+    type=["mp3", "wav", "m4a", "aac"]
+)
+if uploaded_file:
+    st.audio(uploaded_file)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+        ext = uploaded_file.name.split(".")[-1]
+        audio = AudioSegment.from_file(uploaded_file, format=ext)
+        audio.export(tmp.name, format="wav")
+        temp_audio_path = tmp.name
+    st.info("Loading Whisper model...")
+    model = load_model(model_option)
+    start_time = time.time()
+    with st.spinner("Transcribing..."):
+        result = model.transcribe(temp_audio_path)
+    end_time = time.time()
+    os.remove(temp_audio_path)
+    full_text = result["text"]
+    segments = result["segments"]
+    detected_lang = result.get("language", "Unknown")
+    cleaned_text = clean_text(full_text)
+    if output_mode == "Roman Urdu":
+        cleaned_text = convert_to_roman_urdu(cleaned_text)
+    title = generate_title(cleaned_text)
+    summary = generate_summary(cleaned_text)
+    formatted_text = smart_format(cleaned_text)
+    word_count = len(cleaned_text.split())
+    processing_time = round(end_time - start_time, 2)
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("📜 Raw Transcription")
+        st.text_area("", full_text, height=350)
+    with col2:
+        st.subheader("✨ AI Formatted Version")
+        st.text_area("", formatted_text, height=350)
+    st.divider()
+    st.write(f"**Auto Detected Title:** {title}")
+    st.write(f"**Detected Language:** {detected_lang}")
+    st.write(f"**Word Count:** {word_count}")
+    st.write(f"**Processing Time:** {processing_time} sec")
+    excel_file = create_excel(segments)
+    word_file = create_word_document(title, summary, formatted_text)
+    colA, colB = st.columns(2)
+    with colA:
+        st.download_button(
+            "📥 Download Excel (.xlsx)",
+            data=excel_file,
+            file_name="RecToText_Transcription.xlsx"
+        )
+    with colB:
+        st.download_button(
+            "📄 Download Word (.docx)",
+            data=word_file,
+            file_name="RecToText_AI_Lecture.docx"
+        )
+st.divider()
+st.markdown(
+    "<p style='text-align:center;font-size:12px;'>RecToText Pro AI Edition | Auto Title | Smart Summary | AI Formatting</p>",
+    unsafe_allow_html=True
+)