Spaces:

afaqalinagra
/

PASHTO-ASR-MODEL

Sleeping

App Files Files Community

afaqalinagra commited on Jan 23

Commit

bbbf3e8

verified ·

1 Parent(s): dae1e6f

Create app.py

Browse files

Files changed (1) hide show

app.py +162 -0

app.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import gradio as gr
+import torch
+import numpy as np
+import librosa
+from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
+# =========================
+# MODEL CONFIGURATION
+# =========================
+MODEL_ID = "afaqalinagra/PASHTO-ASR-MODEL"
+DEVICE = "cpu"
+DTYPE = torch.float32
+# =========================
+# LOAD MODEL & PROCESSOR
+# =========================
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    MODEL_ID,
+    torch_dtype=DTYPE,
+    low_cpu_mem_usage=True
+)
+model.to(DEVICE)
+model.eval()
+# =========================
+# ASR FUNCTION
+# =========================
+def transcribe(audio):
+    if audio is None:
+        return "No audio provided."
+    sample_rate, waveform = audio
+    # Convert stereo to mono
+    if waveform.ndim > 1:
+        waveform = np.mean(waveform, axis=1)
+    # Ensure float32
+    waveform = waveform.astype(np.float32)
+    # Resample to 16kHz (mandatory for ASR)
+    if sample_rate != 16000:
+        waveform = librosa.resample(
+            waveform,
+            orig_sr=sample_rate,
+            target_sr=16000
+        )
+    inputs = processor(
+        waveform,
+        sampling_rate=16000,
+        return_tensors="pt"
+    )
+    with torch.no_grad():
+        generated_ids = model.generate(
+            inputs.input_features.to(DEVICE)
+        )
+    transcription = processor.batch_decode(
+        generated_ids,
+        skip_special_tokens=True
+    )[0]
+    return transcription.strip()
+# =========================
+# CUSTOM GLASS-MORPHISM CSS
+# =========================
+custom_css = """
+body {
+    background: linear-gradient(135deg, #1e1e2f, #2b5876);
+    font-family: Inter, system-ui, -apple-system, BlinkMacSystemFont;
+}
+.glass-card {
+    background: rgba(255, 255, 255, 0.15);
+    backdrop-filter: blur(16px);
+    -webkit-backdrop-filter: blur(16px);
+    border-radius: 22px;
+    padding: 28px;
+    border: 1px solid rgba(255, 255, 255, 0.25);
+    box-shadow: 0 10px 40px rgba(0, 0, 0, 0.35);
+}
+h1, h2, h3, label {
+    color: white !important;
+}
+.gr-button {
+    background: linear-gradient(135deg, #ff7a18, #ffb347);
+    border-radius: 14px;
+    font-weight: 600;
+    color: black;
+    height: 48px;
+}
+.gr-textbox textarea {
+    background: rgba(255, 255, 255, 0.25);
+    color: white;
+    border-radius: 12px;
+}
+.gr-audio {
+    background: rgba(255, 255, 255, 0.18);
+    border-radius: 14px;
+}
+"""
+# =========================
+# GRADIO UI
+# =========================
+with gr.Blocks(css=custom_css) as demo:
+    with gr.Column(elem_classes=["glass-card"]):
+        gr.Markdown(
+            """
+            <h1 style="text-align:center;">Pashto Speech-to-Text</h1>
+            <h3 style="text-align:center;">Powered by Custom ASR Model</h3>
+            <p style="text-align:center; color:white;">
+            Upload or record Pashto audio and receive accurate transcription.
+            </p>
+            """
+        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                audio_input = gr.Audio(
+                    sources=["upload", "microphone"],
+                    type="numpy",
+                    label="Upload or Record Pashto Audio"
+                )
+                transcribe_btn = gr.Button("Transcribe")
+            with gr.Column(scale=1):
+                output_text = gr.Textbox(
+                    label="Transcription Output",
+                    lines=8,
+                    placeholder="Transcribed text will appear here..."
+                )
+        transcribe_btn.click(
+            fn=transcribe,
+            inputs=audio_input,
+            outputs=output_text
+        )
+# =========================
+# LAUNCH
+# =========================
+demo.launch()